xref: /xnu-8792.81.2/osfmk/kern/task.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_FREE_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/task.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59  *		David Black
60  *
61  *	Task management primitives implementation.
62  */
63 /*
64  * Copyright (c) 1993 The University of Utah and
65  * the Computer Systems Laboratory (CSL).  All rights reserved.
66  *
67  * Permission to use, copy, modify and distribute this software and its
68  * documentation is hereby granted, provided that both the copyright
69  * notice and this permission notice appear in all copies of the
70  * software, derivative works or modified versions, and any portions
71  * thereof, and that both notices appear in supporting documentation.
72  *
73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76  *
77  * CSL requests users of this software to return to [email protected] any
78  * improvements that they make and grant CSL redistribution rights.
79  *
80  */
81 /*
82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83  * support for mandatory and extensible security protections.  This notice
84  * is included in support of clause 2.2 (b) of the Apple Public License,
85  * Version 2.0.
86  * Copyright (c) 2005 SPARTA, Inc.
87  */
88 
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101 
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108 
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/recount.h>
120 #include <kern/sched_prim.h>    /* for thread_wakeup */
121 #include <kern/ipc_tt.h>
122 #include <kern/host.h>
123 #include <kern/clock.h>
124 #include <kern/timer.h>
125 #include <kern/assert.h>
126 #include <kern/affinity.h>
127 #include <kern/exc_resource.h>
128 #include <kern/machine.h>
129 #include <kern/policy_internal.h>
130 #include <kern/restartable.h>
131 #include <kern/ipc_kobject.h>
132 
133 #include <corpses/task_corpse.h>
134 #if CONFIG_TELEMETRY
135 #include <kern/telemetry.h>
136 #endif
137 
138 #if CONFIG_PERVASIVE_CPI
139 #include <kern/monotonic.h>
140 #include <machine/monotonic.h>
141 #endif /* CONFIG_PERVASIVE_CPI */
142 
143 #include <os/log.h>
144 
145 #include <vm/pmap.h>
146 #include <vm/vm_map.h>
147 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
148 #include <vm/vm_pageout.h>
149 #include <vm/vm_protos.h>
150 #include <vm/vm_purgeable_internal.h>
151 #include <vm/vm_compressor_pager.h>
152 #include <vm/vm_reclaim_internal.h>
153 
154 #include <sys/proc_ro.h>
155 #include <sys/resource.h>
156 #include <sys/signalvar.h> /* for coredump */
157 #include <sys/bsdtask_info.h>
158 /*
159  * Exported interfaces
160  */
161 
162 #include <mach/task_server.h>
163 #include <mach/mach_host_server.h>
164 #include <mach/mach_port_server.h>
165 
166 #include <vm/vm_shared_region.h>
167 
168 #include <libkern/OSDebug.h>
169 #include <libkern/OSAtomic.h>
170 #include <libkern/section_keywords.h>
171 
172 #include <mach-o/loader.h>
173 #include <kdp/kdp_dyld.h>
174 
175 #include <kern/sfi.h>           /* picks up ledger.h */
176 
177 #if CONFIG_MACF
178 #include <security/mac_mach_internal.h>
179 #endif
180 
181 #include <IOKit/IOBSD.h>
182 #include <kdp/processor_core.h>
183 
184 #if KPERF
185 extern int kpc_force_all_ctrs(task_t, int);
186 #endif
187 
188 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
189 
190 int64_t         next_taskuniqueid = 0;
191 const size_t task_alignment = _Alignof(struct task);
192 extern const size_t proc_alignment;
193 extern size_t proc_struct_size;
194 extern size_t proc_and_task_size;
195 size_t task_struct_size;
196 
197 extern uint32_t ipc_control_port_options;
198 
199 extern int large_corpse_count;
200 
201 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
202 extern void task_disown_frozen_csegs(task_t owner_task);
203 
204 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
205 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
206 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
207 static inline void task_zone_init(void);
208 
209 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
210 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
211     .iko_op_no_senders = task_port_no_senders);
212 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
213     .iko_op_no_senders = task_port_with_flavor_no_senders);
214 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
215     .iko_op_no_senders = task_port_with_flavor_no_senders);
216 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
217     .iko_op_no_senders = task_suspension_no_senders);
218 
219 #if CONFIG_PROC_RESOURCE_LIMITS
220 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
221 static mach_port_t task_allocate_fatal_port(void);
222 
223 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
224     .iko_op_stable     = true,
225     .iko_op_no_senders = task_fatal_port_no_senders);
226 
227 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
228 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
229 
230 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
231 int audio_active = 0;
232 
233 /*
234  *	structure for tracking zone usage
235  *	Used either one per task/thread for all zones or <per-task,per-zone>.
236  */
237 typedef struct zinfo_usage_store_t {
238 	/* These fields may be updated atomically, and so must be 8 byte aligned */
239 	uint64_t        alloc __attribute__((aligned(8)));              /* allocation counter */
240 	uint64_t        free __attribute__((aligned(8)));               /* free counter */
241 } zinfo_usage_store_t;
242 
243 zinfo_usage_store_t tasks_tkm_private;
244 zinfo_usage_store_t tasks_tkm_shared;
245 
246 /* A container to accumulate statistics for expired tasks */
247 expired_task_statistics_t               dead_task_statistics;
248 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
249 
250 ledger_template_t task_ledger_template = NULL;
251 
252 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
253 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
254 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
255 
256 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
257 {.cpu_time = -1,
258  .tkm_private = -1,
259  .tkm_shared = -1,
260  .phys_mem = -1,
261  .wired_mem = -1,
262  .internal = -1,
263  .iokit_mapped = -1,
264  .external = -1,
265  .reusable = -1,
266  .alternate_accounting = -1,
267  .alternate_accounting_compressed = -1,
268  .page_table = -1,
269  .phys_footprint = -1,
270  .internal_compressed = -1,
271  .purgeable_volatile = -1,
272  .purgeable_nonvolatile = -1,
273  .purgeable_volatile_compressed = -1,
274  .purgeable_nonvolatile_compressed = -1,
275  .tagged_nofootprint = -1,
276  .tagged_footprint = -1,
277  .tagged_nofootprint_compressed = -1,
278  .tagged_footprint_compressed = -1,
279  .network_volatile = -1,
280  .network_nonvolatile = -1,
281  .network_volatile_compressed = -1,
282  .network_nonvolatile_compressed = -1,
283  .media_nofootprint = -1,
284  .media_footprint = -1,
285  .media_nofootprint_compressed = -1,
286  .media_footprint_compressed = -1,
287  .graphics_nofootprint = -1,
288  .graphics_footprint = -1,
289  .graphics_nofootprint_compressed = -1,
290  .graphics_footprint_compressed = -1,
291  .neural_nofootprint = -1,
292  .neural_footprint = -1,
293  .neural_nofootprint_compressed = -1,
294  .neural_footprint_compressed = -1,
295  .platform_idle_wakeups = -1,
296  .interrupt_wakeups = -1,
297 #if CONFIG_SCHED_SFI
298  .sfi_wait_times = { 0 /* initialized at runtime */},
299 #endif /* CONFIG_SCHED_SFI */
300  .cpu_time_billed_to_me = -1,
301  .cpu_time_billed_to_others = -1,
302  .physical_writes = -1,
303  .logical_writes = -1,
304  .logical_writes_to_external = -1,
305 #if DEBUG || DEVELOPMENT
306  .pages_grabbed = -1,
307  .pages_grabbed_kern = -1,
308  .pages_grabbed_iopl = -1,
309  .pages_grabbed_upl = -1,
310 #endif
311 #if CONFIG_FREEZE
312  .frozen_to_swap = -1,
313 #endif /* CONFIG_FREEZE */
314  .energy_billed_to_me = -1,
315  .energy_billed_to_others = -1,
316 #if CONFIG_PHYS_WRITE_ACCT
317  .fs_metadata_writes = -1,
318 #endif /* CONFIG_PHYS_WRITE_ACCT */
319 #if CONFIG_MEMORYSTATUS
320  .memorystatus_dirty_time = -1,
321 #endif /* CONFIG_MEMORYSTATUS */
322  .swapins = -1, };
323 
324 /* System sleep state */
325 boolean_t tasks_suspend_state;
326 
327 
328 void init_task_ledgers(void);
329 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
330 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
331 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
332 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
333 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
334 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
335 #if CONFIG_PROC_RESOURCE_LIMITS
336 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
337 mach_port_name_t current_task_get_fatal_port_name(void);
338 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
339 
340 kern_return_t task_suspend_internal(task_t);
341 kern_return_t task_resume_internal(task_t);
342 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
343 
344 extern kern_return_t iokit_task_terminate(task_t task);
345 extern void          iokit_task_app_suspended_changed(task_t task);
346 
347 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
348 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
349 extern kern_return_t thread_resume(thread_t thread);
350 
351 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
352 
353 // Warn tasks when they hit 80% of their memory limit.
354 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
355 
356 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
357 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
358 
359 /*
360  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
361  *
362  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
363  *  stacktraces, aka micro-stackshots)
364  */
365 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
366 
367 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
368 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
369 
370 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
371 
372 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
373 
374 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
375 unsigned int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
376 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
377 
378 /* I/O Monitor Limits */
379 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
380 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
381 
382 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
383 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
384 
385 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
386 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
387 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
388 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
389 static boolean_t global_update_logical_writes(int64_t, int64_t*);
390 
391 #define TASK_MAX_THREAD_LIMIT 256
392 
393 #if MACH_ASSERT
394 int pmap_ledgers_panic = 1;
395 int pmap_ledgers_panic_leeway = 3;
396 #endif /* MACH_ASSERT */
397 
398 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
399 
400 #if CONFIG_COREDUMP
401 int hwm_user_cores = 0; /* high watermark violations generate user core files */
402 #endif
403 
404 #ifdef MACH_BSD
405 extern uint32_t proc_platform(const struct proc *);
406 extern uint32_t proc_sdk(struct proc *);
407 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
408 extern int      proc_pid(struct proc *p);
409 extern int      proc_selfpid(void);
410 extern struct proc *current_proc(void);
411 extern char     *proc_name_address(struct proc *p);
412 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
413 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
414 extern void workq_proc_suspended(struct proc *p);
415 extern void workq_proc_resumed(struct proc *p);
416 extern struct proc *kernproc;
417 
418 #if CONFIG_MEMORYSTATUS
419 extern void     proc_memstat_skip(struct proc* p, boolean_t set);
420 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
421 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
422 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
423 extern uint64_t  memorystatus_available_memory_internal(struct proc *p);
424 
425 #if DEVELOPMENT || DEBUG
426 extern void memorystatus_abort_vm_map_fork(task_t);
427 #endif
428 
429 #endif /* CONFIG_MEMORYSTATUS */
430 
431 #endif /* MACH_BSD */
432 
433 #if DEVELOPMENT || DEBUG
434 int exc_resource_threads_enabled;
435 #endif /* DEVELOPMENT || DEBUG */
436 
437 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
438 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
439 
440 /*
441  * Defaults for controllable EXC_GUARD behaviors
442  *
443  * Internal builds are fatal by default (except BRIDGE).
444  * Create an alternate set of defaults for special processes by name.
445  */
446 struct task_exc_guard_named_default {
447 	char *name;
448 	uint32_t behavior;
449 };
450 #define _TASK_EXC_GUARD_MP_CORPSE  (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
451 #define _TASK_EXC_GUARD_MP_ONCE    (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
452 #define _TASK_EXC_GUARD_MP_FATAL   (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
453 
454 #define _TASK_EXC_GUARD_VM_CORPSE  (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
455 #define _TASK_EXC_GUARD_VM_ONCE    (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
456 #define _TASK_EXC_GUARD_VM_FATAL   (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
457 
458 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
459 #define _TASK_EXC_GUARD_ALL_ONCE   (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
460 #define _TASK_EXC_GUARD_ALL_FATAL  (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
461 
462 /* cannot turn off FATAL and DELIVER bit if set */
463 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
464     TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
465 /* cannot turn on ONCE bit if unset */
466 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
467 
468 #if !defined(XNU_TARGET_OS_BRIDGE)
469 
470 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
471 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
472 /*
473  * These "by-process-name" default overrides are intended to be a short-term fix to
474  * quickly get over races between changes introducing new EXC_GUARD raising behaviors
475  * in some process and a change in default behavior for same. We should ship with
476  * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
477  * exception behavior via task_set_exc_guard_behavior()).
478  *
479  * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
480  * task_exc_guard_default when transitioning this list between empty and
481  * non-empty.
482  */
483 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
484 
485 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
486 
487 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
488 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
489 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
490 
491 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
492 
493 /* Forwards */
494 
495 static void task_hold_locked(task_t task);
496 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
497 static void task_release_locked(task_t task);
498 extern task_t proc_get_task_raw(void *proc);
499 extern void task_ref_hold_proc_task_struct(task_t task);
500 extern void task_release_proc_task_struct(task_t task);
501 
502 static void task_synchronizer_destroy_all(task_t task);
503 static os_ref_count_t
504 task_add_turnstile_watchports_locked(
505 	task_t                      task,
506 	struct task_watchports      *watchports,
507 	struct task_watchport_elem  **previous_elem_array,
508 	ipc_port_t                  *portwatch_ports,
509 	uint32_t                    portwatch_count);
510 
511 static os_ref_count_t
512 task_remove_turnstile_watchports_locked(
513 	task_t                 task,
514 	struct task_watchports *watchports,
515 	ipc_port_t             *port_freelist);
516 
517 static struct task_watchports *
518 task_watchports_alloc_init(
519 	task_t        task,
520 	thread_t      thread,
521 	uint32_t      count);
522 
523 static void
524 task_watchports_deallocate(
525 	struct task_watchports *watchports);
526 
527 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)528 task_set_64bit(
529 	task_t task,
530 	boolean_t is_64bit,
531 	boolean_t is_64bit_data)
532 {
533 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
534 	thread_t thread;
535 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
536 
537 	task_lock(task);
538 
539 	/*
540 	 * Switching to/from 64-bit address spaces
541 	 */
542 	if (is_64bit) {
543 		if (!task_has_64Bit_addr(task)) {
544 			task_set_64Bit_addr(task);
545 		}
546 	} else {
547 		if (task_has_64Bit_addr(task)) {
548 			task_clear_64Bit_addr(task);
549 		}
550 	}
551 
552 	/*
553 	 * Switching to/from 64-bit register state.
554 	 */
555 	if (is_64bit_data) {
556 		if (task_has_64Bit_data(task)) {
557 			goto out;
558 		}
559 
560 		task_set_64Bit_data(task);
561 	} else {
562 		if (!task_has_64Bit_data(task)) {
563 			goto out;
564 		}
565 
566 		task_clear_64Bit_data(task);
567 	}
568 
569 	/* FIXME: On x86, the thread save state flavor can diverge from the
570 	 * task's 64-bit feature flag due to the 32-bit/64-bit register save
571 	 * state dichotomy. Since we can be pre-empted in this interval,
572 	 * certain routines may observe the thread as being in an inconsistent
573 	 * state with respect to its task's 64-bitness.
574 	 */
575 
576 #if defined(__x86_64__) || defined(__arm64__)
577 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
578 		thread_mtx_lock(thread);
579 		machine_thread_switch_addrmode(thread);
580 		thread_mtx_unlock(thread);
581 	}
582 #endif /* defined(__x86_64__) || defined(__arm64__) */
583 
584 out:
585 	task_unlock(task);
586 }
587 
588 bool
task_get_64bit_addr(task_t task)589 task_get_64bit_addr(task_t task)
590 {
591 	return task_has_64Bit_addr(task);
592 }
593 
594 bool
task_get_64bit_data(task_t task)595 task_get_64bit_data(task_t task)
596 {
597 	return task_has_64Bit_data(task);
598 }
599 
600 void
task_set_platform_binary(task_t task,boolean_t is_platform)601 task_set_platform_binary(
602 	task_t task,
603 	boolean_t is_platform)
604 {
605 	if (is_platform) {
606 		task_ro_flags_set(task, TFRO_PLATFORM);
607 	} else {
608 		task_ro_flags_clear(task, TFRO_PLATFORM);
609 	}
610 }
611 
612 boolean_t
task_get_platform_binary(task_t task)613 task_get_platform_binary(task_t task)
614 {
615 	return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
616 }
617 
618 void
task_set_immovable_pinned(task_t task)619 task_set_immovable_pinned(task_t task)
620 {
621 	ipc_task_set_immovable_pinned(task);
622 }
623 
624 /*
625  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
626  * Returns "false" if flag is already set, and "true" in other cases.
627  */
628 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)629 task_set_ca_client_wi(
630 	task_t task,
631 	boolean_t set_or_clear)
632 {
633 	bool ret = true;
634 	task_lock(task);
635 	if (set_or_clear) {
636 		/* Tasks can have only one CA_CLIENT work interval */
637 		if (task->t_flags & TF_CA_CLIENT_WI) {
638 			ret = false;
639 		} else {
640 			task->t_flags |= TF_CA_CLIENT_WI;
641 		}
642 	} else {
643 		task->t_flags &= ~TF_CA_CLIENT_WI;
644 	}
645 	task_unlock(task);
646 	return ret;
647 }
648 
649 /*
650  * task_set_dyld_info() is called at most three times.
651  * 1) at task struct creation to set addr/size to zero.
652  * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
653  * 3) is from dyld itself to update location of all_image_info
654  * For security any calls after that are ignored.  The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
655  */
656 kern_return_t
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)657 task_set_dyld_info(
658 	task_t            task,
659 	mach_vm_address_t addr,
660 	mach_vm_size_t    size)
661 {
662 	mach_vm_address_t end;
663 	if (os_add_overflow(addr, size, &end)) {
664 		return KERN_FAILURE;
665 	}
666 
667 	task_lock(task);
668 	/* don't accept updates if all_image_info_addr is final */
669 	if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
670 		bool inputNonZero   = ((addr != 0) || (size != 0));
671 		bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
672 		task->all_image_info_addr = addr;
673 		task->all_image_info_size = size;
674 		/* can only change from a non-zero value to another non-zero once */
675 		if (inputNonZero && currentNonZero) {
676 			task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
677 		}
678 		task_unlock(task);
679 		return KERN_SUCCESS;
680 	} else {
681 		task_unlock(task);
682 		return KERN_FAILURE;
683 	}
684 }
685 
686 bool
task_donates_own_pages(task_t task)687 task_donates_own_pages(
688 	task_t task)
689 {
690 	return task->donates_own_pages;
691 }
692 
693 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)694 task_set_mach_header_address(
695 	task_t task,
696 	mach_vm_address_t addr)
697 {
698 	task_lock(task);
699 	task->mach_header_vm_address = addr;
700 	task_unlock(task);
701 }
702 
703 void
task_bank_reset(__unused task_t task)704 task_bank_reset(__unused task_t task)
705 {
706 	if (task->bank_context != NULL) {
707 		bank_task_destroy(task);
708 	}
709 }
710 
711 /*
712  * NOTE: This should only be called when the P_LINTRANSIT
713  *	 flag is set (the proc_trans lock is held) on the
714  *	 proc associated with the task.
715  */
716 void
task_bank_init(__unused task_t task)717 task_bank_init(__unused task_t task)
718 {
719 	if (task->bank_context != NULL) {
720 		panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
721 	}
722 	bank_task_initialize(task);
723 }
724 
725 void
task_set_did_exec_flag(task_t task)726 task_set_did_exec_flag(task_t task)
727 {
728 	task->t_procflags |= TPF_DID_EXEC;
729 }
730 
731 void
task_clear_exec_copy_flag(task_t task)732 task_clear_exec_copy_flag(task_t task)
733 {
734 	task->t_procflags &= ~TPF_EXEC_COPY;
735 }
736 
737 event_t
task_get_return_wait_event(task_t task)738 task_get_return_wait_event(task_t task)
739 {
740 	return (event_t)&task->returnwait_inheritor;
741 }
742 
743 void
task_clear_return_wait(task_t task,uint32_t flags)744 task_clear_return_wait(task_t task, uint32_t flags)
745 {
746 	if (flags & TCRW_CLEAR_INITIAL_WAIT) {
747 		thread_wakeup(task_get_return_wait_event(task));
748 	}
749 
750 	if (flags & TCRW_CLEAR_FINAL_WAIT) {
751 		is_write_lock(task->itk_space);
752 
753 		task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
754 		task->returnwait_inheritor = NULL;
755 
756 		if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
757 			struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
758 			    TURNSTILE_ULOCK);
759 
760 			waitq_wakeup64_all(&turnstile->ts_waitq,
761 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
762 			    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
763 
764 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
765 
766 			turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
767 			turnstile_cleanup();
768 			task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
769 		}
770 		is_write_unlock(task->itk_space);
771 	}
772 }
773 
774 void __attribute__((noreturn))
task_wait_to_return(void)775 task_wait_to_return(void)
776 {
777 	task_t task = current_task();
778 
779 	is_write_lock(task->itk_space);
780 
781 	if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
782 		struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
783 		    TURNSTILE_ULOCK);
784 
785 		do {
786 			task->t_returnwaitflags |= TRW_LRETURNWAITER;
787 			turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
788 			    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
789 
790 			waitq_assert_wait64(&turnstile->ts_waitq,
791 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
792 			    THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
793 
794 			is_write_unlock(task->itk_space);
795 
796 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
797 
798 			thread_block(THREAD_CONTINUE_NULL);
799 
800 			is_write_lock(task->itk_space);
801 		} while (task->t_returnwaitflags & TRW_LRETURNWAIT);
802 
803 		turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
804 	}
805 
806 	is_write_unlock(task->itk_space);
807 	turnstile_cleanup();
808 
809 
810 #if CONFIG_MACF
811 	/*
812 	 * Before jumping to userspace and allowing this process to execute any code,
813 	 * notify any interested parties.
814 	 */
815 	mac_proc_notify_exec_complete(current_proc());
816 #endif
817 
818 	thread_bootstrap_return();
819 }
820 
821 boolean_t
task_is_exec_copy(task_t task)822 task_is_exec_copy(task_t task)
823 {
824 	return task_is_exec_copy_internal(task);
825 }
826 
827 boolean_t
task_did_exec(task_t task)828 task_did_exec(task_t task)
829 {
830 	return task_did_exec_internal(task);
831 }
832 
833 boolean_t
task_is_active(task_t task)834 task_is_active(task_t task)
835 {
836 	return task->active;
837 }
838 
839 boolean_t
task_is_halting(task_t task)840 task_is_halting(task_t task)
841 {
842 	return task->halting;
843 }
844 
845 void
task_init(void)846 task_init(void)
847 {
848 	/*
849 	 * Configure per-task memory limit.
850 	 * The boot-arg is interpreted as Megabytes,
851 	 * and takes precedence over the device tree.
852 	 * Setting the boot-arg to 0 disables task limits.
853 	 */
854 	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
855 	    sizeof(max_task_footprint_mb))) {
856 		/*
857 		 * No limit was found in boot-args, so go look in the device tree.
858 		 */
859 		if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
860 		    sizeof(max_task_footprint_mb))) {
861 			/*
862 			 * No limit was found in device tree.
863 			 */
864 			max_task_footprint_mb = 0;
865 		}
866 	}
867 
868 	if (max_task_footprint_mb != 0) {
869 #if CONFIG_MEMORYSTATUS
870 		if (max_task_footprint_mb < 50) {
871 			printf("Warning: max_task_pmem %d below minimum.\n",
872 			    max_task_footprint_mb);
873 			max_task_footprint_mb = 50;
874 		}
875 		printf("Limiting task physical memory footprint to %d MB\n",
876 		    max_task_footprint_mb);
877 
878 		max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024;         // Convert MB to bytes
879 
880 		/*
881 		 * Configure the per-task memory limit warning level.
882 		 * This is computed as a percentage.
883 		 */
884 		max_task_footprint_warning_level = 0;
885 
886 		if (max_mem < 0x40000000) {
887 			/*
888 			 * On devices with < 1GB of memory:
889 			 *    -- set warnings to 50MB below the per-task limit.
890 			 */
891 			if (max_task_footprint_mb > 50) {
892 				max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
893 			}
894 		} else {
895 			/*
896 			 * On devices with >= 1GB of memory:
897 			 *    -- set warnings to 100MB below the per-task limit.
898 			 */
899 			if (max_task_footprint_mb > 100) {
900 				max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
901 			}
902 		}
903 
904 		/*
905 		 * Never allow warning level to land below the default.
906 		 */
907 		if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
908 			max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
909 		}
910 
911 		printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
912 
913 #else
914 		printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
915 #endif /* CONFIG_MEMORYSTATUS */
916 	}
917 
918 #if DEVELOPMENT || DEBUG
919 	if (!PE_parse_boot_argn("exc_resource_threads",
920 	    &exc_resource_threads_enabled,
921 	    sizeof(exc_resource_threads_enabled))) {
922 		exc_resource_threads_enabled = 1;
923 	}
924 	PE_parse_boot_argn("task_exc_guard_default",
925 	    &task_exc_guard_default,
926 	    sizeof(task_exc_guard_default));
927 #endif /* DEVELOPMENT || DEBUG */
928 
929 #if CONFIG_COREDUMP
930 	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
931 	    sizeof(hwm_user_cores))) {
932 		hwm_user_cores = 0;
933 	}
934 #endif
935 
936 	proc_init_cpumon_params();
937 
938 	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
939 		task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
940 	}
941 
942 	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
943 		task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
944 	}
945 
946 	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
947 	    sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
948 		task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
949 	}
950 
951 	if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
952 	    sizeof(disable_exc_resource))) {
953 		disable_exc_resource = 0;
954 	}
955 
956 	if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
957 		task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
958 	}
959 
960 	if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
961 		task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
962 	}
963 
964 	if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
965 		io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
966 	}
967 
968 /*
969  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
970  * sets up the ledgers for the default coalition. If we don't have coalitions,
971  * then we have to call it now.
972  */
973 #if CONFIG_COALITIONS
974 	assert(task_ledger_template);
975 #else /* CONFIG_COALITIONS */
976 	init_task_ledgers();
977 #endif /* CONFIG_COALITIONS */
978 
979 	task_ref_init();
980 	task_zone_init();
981 
982 #ifdef __LP64__
983 	boolean_t is_64bit = TRUE;
984 #else
985 	boolean_t is_64bit = FALSE;
986 #endif
987 
988 	kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
989 	kernel_task = proc_get_task_raw(kernproc);
990 
991 	/*
992 	 * Create the kernel task as the first task.
993 	 */
994 	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE,
995 	    is_64bit, is_64bit, TF_NONE, TPF_NONE, TWF_NONE, kernel_task) != KERN_SUCCESS) {
996 		panic("task_init");
997 	}
998 
999 	ipc_task_enable(kernel_task);
1000 
1001 #if defined(HAS_APPLE_PAC)
1002 	kernel_task->rop_pid = ml_default_rop_pid();
1003 	kernel_task->jop_pid = ml_default_jop_pid();
1004 	// kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1005 	// disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1006 	ml_task_set_disable_user_jop(kernel_task, FALSE);
1007 #endif
1008 
1009 	vm_map_deallocate(kernel_task->map);
1010 	kernel_task->map = kernel_map;
1011 }
1012 
1013 static inline void
task_zone_init(void)1014 task_zone_init(void)
1015 {
1016 	proc_struct_size = roundup(proc_struct_size, task_alignment);
1017 	task_struct_size = roundup(sizeof(struct task), proc_alignment);
1018 	proc_and_task_size = proc_struct_size + task_struct_size;
1019 
1020 	proc_task_zone = zone_create_ext("proc_task", proc_and_task_size,
1021 	    ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1022 }
1023 
1024 /*
1025  * Task ledgers
1026  * ------------
1027  *
1028  * phys_footprint
1029  *   Physical footprint: This is the sum of:
1030  *     + (internal - alternate_accounting)
1031  *     + (internal_compressed - alternate_accounting_compressed)
1032  *     + iokit_mapped
1033  *     + purgeable_nonvolatile
1034  *     + purgeable_nonvolatile_compressed
1035  *     + page_table
1036  *
1037  * internal
1038  *   The task's anonymous memory, which on iOS is always resident.
1039  *
1040  * internal_compressed
1041  *   Amount of this task's internal memory which is held by the compressor.
1042  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1043  *   and could be either decompressed back into memory, or paged out to storage, depending
1044  *   on our implementation.
1045  *
1046  * iokit_mapped
1047  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1048  *    clean/dirty or internal/external state].
1049  *
1050  * alternate_accounting
1051  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1052  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1053  *   double counting.
1054  *
1055  * pages_grabbed
1056  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1057  *   which track UPL, IOPL and Kernel page grabs.
1058  */
1059 void
init_task_ledgers(void)1060 init_task_ledgers(void)
1061 {
1062 	ledger_template_t t;
1063 
1064 	assert(task_ledger_template == NULL);
1065 	assert(kernel_task == TASK_NULL);
1066 
1067 #if MACH_ASSERT
1068 	PE_parse_boot_argn("pmap_ledgers_panic",
1069 	    &pmap_ledgers_panic,
1070 	    sizeof(pmap_ledgers_panic));
1071 	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1072 	    &pmap_ledgers_panic_leeway,
1073 	    sizeof(pmap_ledgers_panic_leeway));
1074 #endif /* MACH_ASSERT */
1075 
1076 	if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1077 		panic("couldn't create task ledger template");
1078 	}
1079 
1080 	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1081 	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1082 	    "physmem", "bytes");
1083 	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1084 	    "bytes");
1085 	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1086 	    "bytes");
1087 	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1088 	    "bytes");
1089 	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1090 	    "bytes");
1091 	task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1092 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1093 	task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1094 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1095 	task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1096 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1097 	task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1098 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1099 	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1100 	    "bytes");
1101 	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1102 	    "bytes");
1103 	task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1104 	task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1105 	task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1106 	task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1107 	task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1108 	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1109 #if DEBUG || DEVELOPMENT
1110 	task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1111 	task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1112 	task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1113 	task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1114 #endif
1115 	task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1116 	task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1117 	task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1118 	task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1119 	task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1120 	task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1121 	task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1122 	task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1123 	task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1124 	task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1125 	task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1126 	task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1127 	task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1128 	task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1129 	task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1130 	task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1131 	task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1132 	task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1133 	task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1134 	task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1135 
1136 #if CONFIG_FREEZE
1137 	task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1138 #endif /* CONFIG_FREEZE */
1139 
1140 	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1141 	    "count");
1142 	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1143 	    "count");
1144 
1145 #if CONFIG_SCHED_SFI
1146 	sfi_class_id_t class_id, ledger_alias;
1147 	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1148 		task_ledgers.sfi_wait_times[class_id] = -1;
1149 	}
1150 
1151 	/* don't account for UNSPECIFIED */
1152 	for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1153 		ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1154 		if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1155 			/* Check to see if alias has been registered yet */
1156 			if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1157 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1158 			} else {
1159 				/* Otherwise, initialize it first */
1160 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1161 			}
1162 		} else {
1163 			task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1164 		}
1165 
1166 		if (task_ledgers.sfi_wait_times[class_id] < 0) {
1167 			panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1168 		}
1169 	}
1170 
1171 	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1172 #endif /* CONFIG_SCHED_SFI */
1173 
1174 	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1175 	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1176 	task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1177 	task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1178 	task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1179 #if CONFIG_PHYS_WRITE_ACCT
1180 	task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1181 #endif /* CONFIG_PHYS_WRITE_ACCT */
1182 	task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1183 	task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1184 
1185 #if CONFIG_MEMORYSTATUS
1186 	task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1187 #endif /* CONFIG_MEMORYSTATUS */
1188 
1189 	task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1190 	    LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1191 
1192 	if ((task_ledgers.cpu_time < 0) ||
1193 	    (task_ledgers.tkm_private < 0) ||
1194 	    (task_ledgers.tkm_shared < 0) ||
1195 	    (task_ledgers.phys_mem < 0) ||
1196 	    (task_ledgers.wired_mem < 0) ||
1197 	    (task_ledgers.internal < 0) ||
1198 	    (task_ledgers.external < 0) ||
1199 	    (task_ledgers.reusable < 0) ||
1200 	    (task_ledgers.iokit_mapped < 0) ||
1201 	    (task_ledgers.alternate_accounting < 0) ||
1202 	    (task_ledgers.alternate_accounting_compressed < 0) ||
1203 	    (task_ledgers.page_table < 0) ||
1204 	    (task_ledgers.phys_footprint < 0) ||
1205 	    (task_ledgers.internal_compressed < 0) ||
1206 	    (task_ledgers.purgeable_volatile < 0) ||
1207 	    (task_ledgers.purgeable_nonvolatile < 0) ||
1208 	    (task_ledgers.purgeable_volatile_compressed < 0) ||
1209 	    (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1210 	    (task_ledgers.tagged_nofootprint < 0) ||
1211 	    (task_ledgers.tagged_footprint < 0) ||
1212 	    (task_ledgers.tagged_nofootprint_compressed < 0) ||
1213 	    (task_ledgers.tagged_footprint_compressed < 0) ||
1214 #if CONFIG_FREEZE
1215 	    (task_ledgers.frozen_to_swap < 0) ||
1216 #endif /* CONFIG_FREEZE */
1217 	    (task_ledgers.network_volatile < 0) ||
1218 	    (task_ledgers.network_nonvolatile < 0) ||
1219 	    (task_ledgers.network_volatile_compressed < 0) ||
1220 	    (task_ledgers.network_nonvolatile_compressed < 0) ||
1221 	    (task_ledgers.media_nofootprint < 0) ||
1222 	    (task_ledgers.media_footprint < 0) ||
1223 	    (task_ledgers.media_nofootprint_compressed < 0) ||
1224 	    (task_ledgers.media_footprint_compressed < 0) ||
1225 	    (task_ledgers.graphics_nofootprint < 0) ||
1226 	    (task_ledgers.graphics_footprint < 0) ||
1227 	    (task_ledgers.graphics_nofootprint_compressed < 0) ||
1228 	    (task_ledgers.graphics_footprint_compressed < 0) ||
1229 	    (task_ledgers.neural_nofootprint < 0) ||
1230 	    (task_ledgers.neural_footprint < 0) ||
1231 	    (task_ledgers.neural_nofootprint_compressed < 0) ||
1232 	    (task_ledgers.neural_footprint_compressed < 0) ||
1233 	    (task_ledgers.platform_idle_wakeups < 0) ||
1234 	    (task_ledgers.interrupt_wakeups < 0) ||
1235 	    (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1236 	    (task_ledgers.physical_writes < 0) ||
1237 	    (task_ledgers.logical_writes < 0) ||
1238 	    (task_ledgers.logical_writes_to_external < 0) ||
1239 #if CONFIG_PHYS_WRITE_ACCT
1240 	    (task_ledgers.fs_metadata_writes < 0) ||
1241 #endif /* CONFIG_PHYS_WRITE_ACCT */
1242 #if CONFIG_MEMORYSTATUS
1243 	    (task_ledgers.memorystatus_dirty_time < 0) ||
1244 #endif /* CONFIG_MEMORYSTATUS */
1245 	    (task_ledgers.energy_billed_to_me < 0) ||
1246 	    (task_ledgers.energy_billed_to_others < 0) ||
1247 	    (task_ledgers.swapins < 0)
1248 	    ) {
1249 		panic("couldn't create entries for task ledger template");
1250 	}
1251 
1252 	ledger_track_credit_only(t, task_ledgers.phys_footprint);
1253 	ledger_track_credit_only(t, task_ledgers.internal);
1254 	ledger_track_credit_only(t, task_ledgers.external);
1255 	ledger_track_credit_only(t, task_ledgers.reusable);
1256 
1257 	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1258 	ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1259 	ledger_track_maximum(t, task_ledgers.internal, 60);
1260 	ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1261 	ledger_track_maximum(t, task_ledgers.reusable, 60);
1262 	ledger_track_maximum(t, task_ledgers.external, 60);
1263 #if MACH_ASSERT
1264 	if (pmap_ledgers_panic) {
1265 		ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1266 		ledger_panic_on_negative(t, task_ledgers.page_table);
1267 		ledger_panic_on_negative(t, task_ledgers.internal);
1268 		ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1269 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1270 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1271 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1272 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1273 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1274 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1275 #if CONFIG_PHYS_WRITE_ACCT
1276 		ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1277 #endif /* CONFIG_PHYS_WRITE_ACCT */
1278 
1279 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1280 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1281 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1282 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1283 		ledger_panic_on_negative(t, task_ledgers.network_volatile);
1284 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1285 		ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1286 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1287 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1288 		ledger_panic_on_negative(t, task_ledgers.media_footprint);
1289 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1290 		ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1291 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1292 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1293 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1294 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1295 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1296 		ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1297 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1298 		ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1299 	}
1300 #endif /* MACH_ASSERT */
1301 
1302 #if CONFIG_MEMORYSTATUS
1303 	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1304 #endif /* CONFIG_MEMORYSTATUS */
1305 
1306 	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1307 	    task_wakeups_rate_exceeded, NULL, NULL);
1308 	ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1309 
1310 #if !XNU_MONITOR
1311 	ledger_template_complete(t);
1312 #else /* !XNU_MONITOR */
1313 	ledger_template_complete_secure_alloc(t);
1314 #endif /* XNU_MONITOR */
1315 	task_ledger_template = t;
1316 }
1317 
1318 /* Create a task, but leave the task ports disabled */
1319 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit __unused,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t child_task)1320 task_create_internal(
1321 	task_t             parent_task,            /* Null-able */
1322 	proc_ro_t          proc_ro,
1323 	coalition_t        *parent_coalitions __unused,
1324 	boolean_t          inherit_memory,
1325 	boolean_t          is_64bit __unused,
1326 	boolean_t          is_64bit_data,
1327 	uint32_t           t_flags,
1328 	uint32_t           t_procflags,
1329 	uint8_t            t_returnwaitflags,
1330 	task_t             child_task)
1331 {
1332 	task_t                  new_task;
1333 	vm_shared_region_t      shared_region;
1334 	ledger_t                ledger = NULL;
1335 	struct task_ro_data     task_ro_data = {};
1336 	uint32_t                parent_t_flags_ro = 0;
1337 
1338 	new_task = child_task;
1339 
1340 	if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1341 		return KERN_RESOURCE_SHORTAGE;
1342 	}
1343 
1344 	/* allocate with active entries */
1345 	assert(task_ledger_template != NULL);
1346 	ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1347 	if (ledger == NULL) {
1348 		task_ref_count_fini(new_task);
1349 		return KERN_RESOURCE_SHORTAGE;
1350 	}
1351 
1352 	counter_alloc(&(new_task->faults));
1353 
1354 #if defined(HAS_APPLE_PAC)
1355 	ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1356 	ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1357 	ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1358 #endif
1359 
1360 
1361 	new_task->ledger = ledger;
1362 
1363 	/* if inherit_memory is true, parent_task MUST not be NULL */
1364 	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1365 #if CONFIG_DEFERRED_RECLAIM
1366 		if (parent_task->deferred_reclamation_metadata) {
1367 			/*
1368 			 * Prevent concurrent reclaims while we're forking the parent_task's map,
1369 			 * so that the child's map is in sync with the forked reclamation
1370 			 * metadata.
1371 			 */
1372 			vm_deferred_reclamation_buffer_lock(parent_task->deferred_reclamation_metadata);
1373 		}
1374 #endif /* CONFIG_DEFERRED_RECLAIM */
1375 		new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1376 #if CONFIG_DEFERRED_RECLAIM
1377 		if (parent_task->deferred_reclamation_metadata) {
1378 			new_task->deferred_reclamation_metadata =
1379 			    vm_deferred_reclamation_buffer_fork(new_task, parent_task->deferred_reclamation_metadata);
1380 		}
1381 #endif /* CONFIG_DEFERRED_RECLAIM */
1382 	} else {
1383 		unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1384 		pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1385 		if (pmap == NULL) {
1386 			counter_free(&new_task->faults);
1387 			ledger_dereference(ledger);
1388 			task_ref_count_fini(new_task);
1389 			return KERN_RESOURCE_SHORTAGE;
1390 		}
1391 		new_task->map = vm_map_create_options(pmap,
1392 		    (vm_map_offset_t)(VM_MIN_ADDRESS),
1393 		    (vm_map_offset_t)(VM_MAX_ADDRESS),
1394 		    VM_MAP_CREATE_PAGEABLE);
1395 	}
1396 
1397 	if (new_task->map == NULL) {
1398 		counter_free(&new_task->faults);
1399 		ledger_dereference(ledger);
1400 		task_ref_count_fini(new_task);
1401 		return KERN_RESOURCE_SHORTAGE;
1402 	}
1403 
1404 #if defined(CONFIG_SCHED_MULTIQ)
1405 	new_task->sched_group = sched_group_create();
1406 #endif
1407 
1408 	/* Inherit address space and memlock limit from parent */
1409 	if (parent_task) {
1410 		vm_map_set_size_limit(new_task->map, parent_task->map->size_limit);
1411 		vm_map_set_data_limit(new_task->map, parent_task->map->data_limit);
1412 		vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1413 	}
1414 
1415 	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1416 	queue_init(&new_task->threads);
1417 	new_task->suspend_count = 0;
1418 	new_task->thread_count = 0;
1419 	new_task->active_thread_count = 0;
1420 	new_task->user_stop_count = 0;
1421 	new_task->legacy_stop_count = 0;
1422 	new_task->active = TRUE;
1423 	new_task->halting = FALSE;
1424 	new_task->priv_flags = 0;
1425 	new_task->t_flags = t_flags;
1426 	new_task->t_procflags = t_procflags;
1427 	new_task->t_returnwaitflags = t_returnwaitflags;
1428 	new_task->returnwait_inheritor = current_thread();
1429 	new_task->importance = 0;
1430 	new_task->crashed_thread_id = 0;
1431 	new_task->exec_token = 0;
1432 	new_task->watchports = NULL;
1433 	new_task->t_rr_ranges = NULL;
1434 
1435 	new_task->bank_context = NULL;
1436 
1437 	if (parent_task) {
1438 		parent_t_flags_ro = task_ro_flags_get(parent_task);
1439 	}
1440 
1441 #if __has_feature(ptrauth_calls)
1442 	/* Inherit the pac exception flags from parent if in fork */
1443 	if (parent_task && inherit_memory) {
1444 		task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1445 		    TFRO_PAC_EXC_FATAL));
1446 	}
1447 #endif
1448 
1449 #ifdef MACH_BSD
1450 	new_task->corpse_info = NULL;
1451 #endif /* MACH_BSD */
1452 
1453 	/* kern_task not created by this function has unique id 0, start with 1 here. */
1454 	task_set_uniqueid(new_task);
1455 
1456 #if CONFIG_MACF
1457 	set_task_crash_label(new_task, NULL);
1458 
1459 	task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1460 	task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1461 #endif
1462 
1463 #if CONFIG_MEMORYSTATUS
1464 	if (max_task_footprint != 0) {
1465 		ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1466 	}
1467 #endif /* CONFIG_MEMORYSTATUS */
1468 
1469 	if (task_wakeups_monitor_rate != 0) {
1470 		uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1471 		int32_t  rate;        // Ignored because of WAKEMON_SET_DEFAULTS
1472 		task_wakeups_monitor_ctl(new_task, &flags, &rate);
1473 	}
1474 
1475 #if CONFIG_IO_ACCOUNTING
1476 	uint32_t flags = IOMON_ENABLE;
1477 	task_io_monitor_ctl(new_task, &flags);
1478 #endif /* CONFIG_IO_ACCOUNTING */
1479 
1480 	machine_task_init(new_task, parent_task, inherit_memory);
1481 
1482 	new_task->task_debug = NULL;
1483 
1484 #if DEVELOPMENT || DEBUG
1485 	new_task->task_unnested = FALSE;
1486 	new_task->task_disconnected_count = 0;
1487 #endif
1488 	queue_init(&new_task->semaphore_list);
1489 	new_task->semaphores_owned = 0;
1490 
1491 	new_task->vtimers = 0;
1492 
1493 	new_task->shared_region = NULL;
1494 
1495 	new_task->affinity_space = NULL;
1496 
1497 	new_task->t_kpc = 0;
1498 
1499 	new_task->pidsuspended = FALSE;
1500 	new_task->frozen = FALSE;
1501 	new_task->changing_freeze_state = FALSE;
1502 	new_task->rusage_cpu_flags = 0;
1503 	new_task->rusage_cpu_percentage = 0;
1504 	new_task->rusage_cpu_interval = 0;
1505 	new_task->rusage_cpu_deadline = 0;
1506 	new_task->rusage_cpu_callt = NULL;
1507 #if MACH_ASSERT
1508 	new_task->suspends_outstanding = 0;
1509 #endif
1510 	recount_task_init(&new_task->tk_recount);
1511 
1512 #if HYPERVISOR
1513 	new_task->hv_task_target = NULL;
1514 #endif /* HYPERVISOR */
1515 
1516 #if CONFIG_TASKWATCH
1517 	queue_init(&new_task->task_watchers);
1518 	new_task->num_taskwatchers  = 0;
1519 	new_task->watchapplying  = 0;
1520 #endif /* CONFIG_TASKWATCH */
1521 
1522 	new_task->mem_notify_reserved = 0;
1523 	new_task->memlimit_attrs_reserved = 0;
1524 
1525 	new_task->requested_policy = default_task_requested_policy;
1526 	new_task->effective_policy = default_task_effective_policy;
1527 
1528 	new_task->task_shared_region_slide = -1;
1529 
1530 	if (parent_task != NULL) {
1531 		task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1532 		task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1533 
1534 		/* only inherit the option bits, no effect until task_set_immovable_pinned() */
1535 		task_ro_data.task_control_port_options = task_get_control_port_options(parent_task);
1536 
1537 		task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1538 #if CONFIG_MACF
1539 		if (!(t_flags & TF_CORPSE_FORK)) {
1540 			task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(parent_task);
1541 			task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(parent_task);
1542 		}
1543 #endif
1544 	} else {
1545 		task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1546 		task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1547 
1548 		task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1549 	}
1550 
1551 	/* must set before task_importance_init_from_parent: */
1552 	if (proc_ro != NULL) {
1553 		new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1554 	} else {
1555 		new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1556 	}
1557 
1558 	ipc_task_init(new_task, parent_task);
1559 
1560 	task_importance_init_from_parent(new_task, parent_task);
1561 
1562 	new_task->corpse_vmobject_list = NULL;
1563 
1564 	if (parent_task != TASK_NULL) {
1565 		/* inherit the parent's shared region */
1566 		shared_region = vm_shared_region_get(parent_task);
1567 		if (shared_region != NULL) {
1568 			vm_shared_region_set(new_task, shared_region);
1569 		}
1570 
1571 #if __has_feature(ptrauth_calls)
1572 		/* use parent's shared_region_id */
1573 		char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1574 		if (shared_region_id != NULL) {
1575 			shared_region_key_alloc(shared_region_id, FALSE, 0);         /* get a reference */
1576 		}
1577 		task_set_shared_region_id(new_task, shared_region_id);
1578 #endif /* __has_feature(ptrauth_calls) */
1579 
1580 		if (task_has_64Bit_addr(parent_task)) {
1581 			task_set_64Bit_addr(new_task);
1582 		}
1583 
1584 		if (task_has_64Bit_data(parent_task)) {
1585 			task_set_64Bit_data(new_task);
1586 		}
1587 
1588 		new_task->all_image_info_addr = parent_task->all_image_info_addr;
1589 		new_task->all_image_info_size = parent_task->all_image_info_size;
1590 		new_task->mach_header_vm_address = 0;
1591 
1592 		if (inherit_memory && parent_task->affinity_space) {
1593 			task_affinity_create(parent_task, new_task);
1594 		}
1595 
1596 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1597 
1598 		new_task->task_exc_guard = parent_task->task_exc_guard;
1599 		if (parent_task->t_flags & TF_NO_SMT) {
1600 			new_task->t_flags |= TF_NO_SMT;
1601 		}
1602 
1603 		if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1604 			new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1605 		}
1606 
1607 		if (parent_task->t_flags & TF_TECS) {
1608 			new_task->t_flags |= TF_TECS;
1609 		}
1610 
1611 #if defined(__x86_64__)
1612 		if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1613 			new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1614 		}
1615 #endif
1616 		new_task->priority = BASEPRI_DEFAULT;
1617 		new_task->max_priority = MAXPRI_USER;
1618 
1619 		task_policy_create(new_task, parent_task);
1620 	} else {
1621 #ifdef __LP64__
1622 		if (is_64bit) {
1623 			task_set_64Bit_addr(new_task);
1624 		}
1625 #endif
1626 
1627 		if (is_64bit_data) {
1628 			task_set_64Bit_data(new_task);
1629 		}
1630 
1631 		new_task->all_image_info_addr = (mach_vm_address_t)0;
1632 		new_task->all_image_info_size = (mach_vm_size_t)0;
1633 
1634 		new_task->pset_hint = PROCESSOR_SET_NULL;
1635 
1636 		new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1637 
1638 		if (new_task == kernel_task) {
1639 			new_task->priority = BASEPRI_KERNEL;
1640 			new_task->max_priority = MAXPRI_KERNEL;
1641 		} else {
1642 			new_task->priority = BASEPRI_DEFAULT;
1643 			new_task->max_priority = MAXPRI_USER;
1644 		}
1645 	}
1646 
1647 	bzero(new_task->coalition, sizeof(new_task->coalition));
1648 	for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1649 		queue_chain_init(new_task->task_coalition[i]);
1650 	}
1651 
1652 	/* Allocate I/O Statistics */
1653 	new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1654 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1655 
1656 	bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1657 	bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1658 
1659 	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1660 
1661 	counter_alloc(&(new_task->pageins));
1662 	counter_alloc(&(new_task->cow_faults));
1663 	counter_alloc(&(new_task->messages_sent));
1664 	counter_alloc(&(new_task->messages_received));
1665 
1666 	/* Copy resource acc. info from Parent for Corpe Forked task. */
1667 	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1668 		task_rollup_accounting_info(new_task, parent_task);
1669 		task_store_owned_vmobject_info(new_task, parent_task);
1670 	} else {
1671 		/* Initialize to zero for standard fork/spawn case */
1672 		new_task->total_runnable_time = 0;
1673 		new_task->syscalls_mach = 0;
1674 		new_task->syscalls_unix = 0;
1675 		new_task->c_switch = 0;
1676 		new_task->p_switch = 0;
1677 		new_task->ps_switch = 0;
1678 		new_task->decompressions = 0;
1679 		new_task->low_mem_notified_warn = 0;
1680 		new_task->low_mem_notified_critical = 0;
1681 		new_task->purged_memory_warn = 0;
1682 		new_task->purged_memory_critical = 0;
1683 		new_task->low_mem_privileged_listener = 0;
1684 		new_task->memlimit_is_active = 0;
1685 		new_task->memlimit_is_fatal = 0;
1686 		new_task->memlimit_active_exc_resource = 0;
1687 		new_task->memlimit_inactive_exc_resource = 0;
1688 		new_task->task_timer_wakeups_bin_1 = 0;
1689 		new_task->task_timer_wakeups_bin_2 = 0;
1690 		new_task->task_gpu_ns = 0;
1691 		new_task->task_writes_counters_internal.task_immediate_writes = 0;
1692 		new_task->task_writes_counters_internal.task_deferred_writes = 0;
1693 		new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1694 		new_task->task_writes_counters_internal.task_metadata_writes = 0;
1695 		new_task->task_writes_counters_external.task_immediate_writes = 0;
1696 		new_task->task_writes_counters_external.task_deferred_writes = 0;
1697 		new_task->task_writes_counters_external.task_invalidated_writes = 0;
1698 		new_task->task_writes_counters_external.task_metadata_writes = 0;
1699 #if CONFIG_PHYS_WRITE_ACCT
1700 		new_task->task_fs_metadata_writes = 0;
1701 #endif /* CONFIG_PHYS_WRITE_ACCT */
1702 	}
1703 
1704 
1705 	new_task->donates_own_pages = FALSE;
1706 #if CONFIG_COALITIONS
1707 	if (!(t_flags & TF_CORPSE_FORK)) {
1708 		/* TODO: there is no graceful failure path here... */
1709 		if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1710 			coalitions_adopt_task(parent_coalitions, new_task);
1711 			if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1712 				new_task->donates_own_pages = coalition_is_swappable(parent_coalitions[COALITION_TYPE_JETSAM]);
1713 			}
1714 		} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1715 			/*
1716 			 * all tasks at least have a resource coalition, so
1717 			 * if the parent has one then inherit all coalitions
1718 			 * the parent is a part of
1719 			 */
1720 			coalitions_adopt_task(parent_task->coalition, new_task);
1721 			if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1722 				new_task->donates_own_pages = coalition_is_swappable(parent_task->coalition[COALITION_TYPE_JETSAM]);
1723 			}
1724 		} else {
1725 			/* TODO: assert that new_task will be PID 1 (launchd) */
1726 			coalitions_adopt_init_task(new_task);
1727 		}
1728 		/*
1729 		 * on exec, we need to transfer the coalition roles from the
1730 		 * parent task to the exec copy task.
1731 		 */
1732 		if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1733 			int coal_roles[COALITION_NUM_TYPES];
1734 			task_coalition_roles(parent_task, coal_roles);
1735 			(void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1736 		}
1737 	} else {
1738 		coalitions_adopt_corpse_task(new_task);
1739 	}
1740 
1741 	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1742 		panic("created task is not a member of a resource coalition");
1743 	}
1744 	task_set_coalition_member(new_task);
1745 #endif /* CONFIG_COALITIONS */
1746 
1747 	new_task->dispatchqueue_offset = 0;
1748 	if (parent_task != NULL) {
1749 		new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1750 	}
1751 
1752 	new_task->task_can_transfer_memory_ownership = FALSE;
1753 	new_task->task_volatile_objects = 0;
1754 	new_task->task_nonvolatile_objects = 0;
1755 	new_task->task_objects_disowning = FALSE;
1756 	new_task->task_objects_disowned = FALSE;
1757 	new_task->task_owned_objects = 0;
1758 	queue_init(&new_task->task_objq);
1759 
1760 #if CONFIG_FREEZE
1761 	queue_init(&new_task->task_frozen_cseg_q);
1762 #endif /* CONFIG_FREEZE */
1763 
1764 	task_objq_lock_init(new_task);
1765 
1766 #if __arm64__
1767 	new_task->task_legacy_footprint = FALSE;
1768 	new_task->task_extra_footprint_limit = FALSE;
1769 	new_task->task_ios13extended_footprint_limit = FALSE;
1770 #endif /* __arm64__ */
1771 	new_task->task_region_footprint = FALSE;
1772 	new_task->task_has_crossed_thread_limit = FALSE;
1773 	new_task->task_thread_limit = 0;
1774 #if CONFIG_SECLUDED_MEMORY
1775 	new_task->task_can_use_secluded_mem = FALSE;
1776 	new_task->task_could_use_secluded_mem = FALSE;
1777 	new_task->task_could_also_use_secluded_mem = FALSE;
1778 	new_task->task_suppressed_secluded = FALSE;
1779 #endif /* CONFIG_SECLUDED_MEMORY */
1780 
1781 	/*
1782 	 * t_flags is set up above. But since we don't
1783 	 * support darkwake mode being set that way
1784 	 * currently, we clear it out here explicitly.
1785 	 */
1786 	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1787 
1788 	queue_init(&new_task->io_user_clients);
1789 	new_task->loadTag = 0;
1790 
1791 	lck_mtx_lock(&tasks_threads_lock);
1792 	queue_enter(&tasks, new_task, task_t, tasks);
1793 	tasks_count++;
1794 	if (tasks_suspend_state) {
1795 		task_suspend_internal(new_task);
1796 	}
1797 	lck_mtx_unlock(&tasks_threads_lock);
1798 	task_ref_hold_proc_task_struct(new_task);
1799 
1800 	return KERN_SUCCESS;
1801 }
1802 
1803 /*
1804  *	task_rollup_accounting_info
1805  *
1806  *	Roll up accounting stats. Used to rollup stats
1807  *	for exec copy task and corpse fork.
1808  */
1809 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1810 task_rollup_accounting_info(task_t to_task, task_t from_task)
1811 {
1812 	assert(from_task != to_task);
1813 
1814 	recount_task_copy(&to_task->tk_recount, &from_task->tk_recount);
1815 	to_task->total_runnable_time = from_task->total_runnable_time;
1816 	counter_add(&to_task->faults, counter_load(&from_task->faults));
1817 	counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1818 	counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1819 	counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1820 	counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1821 	to_task->decompressions = from_task->decompressions;
1822 	to_task->syscalls_mach = from_task->syscalls_mach;
1823 	to_task->syscalls_unix = from_task->syscalls_unix;
1824 	to_task->c_switch = from_task->c_switch;
1825 	to_task->p_switch = from_task->p_switch;
1826 	to_task->ps_switch = from_task->ps_switch;
1827 	to_task->extmod_statistics = from_task->extmod_statistics;
1828 	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1829 	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1830 	to_task->purged_memory_warn = from_task->purged_memory_warn;
1831 	to_task->purged_memory_critical = from_task->purged_memory_critical;
1832 	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1833 	*to_task->task_io_stats = *from_task->task_io_stats;
1834 	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1835 	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1836 	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1837 	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1838 	to_task->task_gpu_ns = from_task->task_gpu_ns;
1839 	to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1840 	to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1841 	to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1842 	to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1843 	to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1844 	to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1845 	to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1846 	to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1847 #if CONFIG_PHYS_WRITE_ACCT
1848 	to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1849 #endif /* CONFIG_PHYS_WRITE_ACCT */
1850 
1851 #if CONFIG_MEMORYSTATUS
1852 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
1853 #endif /* CONFIG_MEMORYSTATUS */
1854 
1855 	/* Skip ledger roll up for memory accounting entries */
1856 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1857 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1858 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1859 #if CONFIG_SCHED_SFI
1860 	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1861 		ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1862 	}
1863 #endif
1864 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1865 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1866 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1867 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1868 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1869 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1870 }
1871 
1872 /*
1873  *	task_deallocate_internal:
1874  *
1875  *	Drop a reference on a task.
1876  *	Don't call this directly.
1877  */
1878 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
1879 void
task_deallocate_internal(task_t task,os_ref_count_t refs)1880 task_deallocate_internal(
1881 	task_t          task,
1882 	os_ref_count_t  refs)
1883 {
1884 	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1885 
1886 	if (task == TASK_NULL) {
1887 		return;
1888 	}
1889 
1890 #if IMPORTANCE_INHERITANCE
1891 	if (refs == 1) {
1892 		/*
1893 		 * If last ref potentially comes from the task's importance,
1894 		 * disconnect it.  But more task refs may be added before
1895 		 * that completes, so wait for the reference to go to zero
1896 		 * naturally (it may happen on a recursive task_deallocate()
1897 		 * from the ipc_importance_disconnect_task() call).
1898 		 */
1899 		if (IIT_NULL != task->task_imp_base) {
1900 			ipc_importance_disconnect_task(task);
1901 		}
1902 		return;
1903 	}
1904 #endif /* IMPORTANCE_INHERITANCE */
1905 
1906 	if (refs > 0) {
1907 		return;
1908 	}
1909 
1910 	/*
1911 	 * The task should be dead at this point. Ensure other resources
1912 	 * like threads, are gone before we trash the world.
1913 	 */
1914 	assert(queue_empty(&task->threads));
1915 	assert(get_bsdtask_info(task) == NULL);
1916 	assert(!is_active(task->itk_space));
1917 	assert(!task->active);
1918 	assert(task->active_thread_count == 0);
1919 
1920 	lck_mtx_lock(&tasks_threads_lock);
1921 	assert(terminated_tasks_count > 0);
1922 	queue_remove(&terminated_tasks, task, task_t, tasks);
1923 	terminated_tasks_count--;
1924 	lck_mtx_unlock(&tasks_threads_lock);
1925 
1926 	/*
1927 	 * remove the reference on bank context
1928 	 */
1929 	task_bank_reset(task);
1930 
1931 	kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
1932 
1933 	/*
1934 	 *	Give the machine dependent code a chance
1935 	 *	to perform cleanup before ripping apart
1936 	 *	the task.
1937 	 */
1938 	machine_task_terminate(task);
1939 
1940 	ipc_task_terminate(task);
1941 
1942 	/* let iokit know */
1943 	iokit_task_terminate(task);
1944 
1945 	/* Unregister task from userspace coredumps on panic */
1946 	kern_unregister_userspace_coredump(task);
1947 
1948 	if (task->affinity_space) {
1949 		task_affinity_deallocate(task);
1950 	}
1951 
1952 #if MACH_ASSERT
1953 	if (task->ledger != NULL &&
1954 	    task->map != NULL &&
1955 	    task->map->pmap != NULL &&
1956 	    task->map->pmap->ledger != NULL) {
1957 		assert(task->ledger == task->map->pmap->ledger);
1958 	}
1959 #endif /* MACH_ASSERT */
1960 
1961 	vm_owned_objects_disown(task);
1962 	assert(task->task_objects_disowned);
1963 	if (task->task_owned_objects != 0) {
1964 		panic("task_deallocate(%p): "
1965 		    "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1966 		    task,
1967 		    task->task_volatile_objects,
1968 		    task->task_nonvolatile_objects,
1969 		    task->task_owned_objects);
1970 	}
1971 
1972 #if CONFIG_DEFERRED_RECLAIM
1973 	if (task->deferred_reclamation_metadata != NULL) {
1974 		vm_deferred_reclamation_buffer_deallocate(task->deferred_reclamation_metadata);
1975 		task->deferred_reclamation_metadata = NULL;
1976 	}
1977 #endif /* CONFIG_DEFERRED_RECLAIM */
1978 
1979 	vm_map_deallocate(task->map);
1980 	if (task->is_large_corpse) {
1981 		assert(large_corpse_count > 0);
1982 		OSDecrementAtomic(&large_corpse_count);
1983 		task->is_large_corpse = false;
1984 	}
1985 	is_release(task->itk_space);
1986 	if (task->t_rr_ranges) {
1987 		restartable_ranges_release(task->t_rr_ranges);
1988 	}
1989 
1990 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1991 	    &interrupt_wakeups, &debit);
1992 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1993 	    &platform_idle_wakeups, &debit);
1994 
1995 #if defined(CONFIG_SCHED_MULTIQ)
1996 	sched_group_destroy(task->sched_group);
1997 #endif
1998 
1999 	struct recount_times_mach sum = { 0 };
2000 	struct recount_times_mach p_only = { 0 };
2001 	recount_task_times_perf_only(task, &sum, &p_only);
2002 #if CONFIG_PERVASIVE_ENERGY
2003 	uint64_t energy = recount_task_energy_nj(task);
2004 #endif /* CONFIG_PERVASIVE_ENERGY */
2005 	recount_task_deinit(&task->tk_recount);
2006 
2007 	/* Accumulate statistics for dead tasks */
2008 	lck_spin_lock(&dead_task_statistics_lock);
2009 	dead_task_statistics.total_user_time += sum.rtm_user;
2010 	dead_task_statistics.total_system_time += sum.rtm_system;
2011 
2012 	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2013 	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2014 
2015 	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2016 	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2017 	dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2018 	dead_task_statistics.total_pset_switches += task->ps_switch;
2019 	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2020 #if CONFIG_PERVASIVE_ENERGY
2021 	dead_task_statistics.task_energy += energy;
2022 #endif /* CONFIG_PERVASIVE_ENERGY */
2023 
2024 	lck_spin_unlock(&dead_task_statistics_lock);
2025 	lck_mtx_destroy(&task->lock, &task_lck_grp);
2026 
2027 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
2028 	    &debit)) {
2029 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2030 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2031 	}
2032 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
2033 	    &debit)) {
2034 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2035 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2036 	}
2037 	ledger_dereference(task->ledger);
2038 
2039 	counter_free(&task->faults);
2040 	counter_free(&task->pageins);
2041 	counter_free(&task->cow_faults);
2042 	counter_free(&task->messages_sent);
2043 	counter_free(&task->messages_received);
2044 
2045 #if CONFIG_COALITIONS
2046 	task_release_coalitions(task);
2047 #endif /* CONFIG_COALITIONS */
2048 
2049 	bzero(task->coalition, sizeof(task->coalition));
2050 
2051 #if MACH_BSD
2052 	/* clean up collected information since last reference to task is gone */
2053 	if (task->corpse_info) {
2054 		void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
2055 		task_crashinfo_destroy(task->corpse_info);
2056 		task->corpse_info = NULL;
2057 		kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2058 	}
2059 #endif
2060 
2061 #if CONFIG_MACF
2062 	if (get_task_crash_label(task)) {
2063 		mac_exc_free_label(get_task_crash_label(task));
2064 		set_task_crash_label(task, NULL);
2065 	}
2066 #endif
2067 
2068 	assert(queue_empty(&task->task_objq));
2069 	task_objq_lock_destroy(task);
2070 
2071 	if (task->corpse_vmobject_list) {
2072 		kfree_data(task->corpse_vmobject_list,
2073 		    (vm_size_t)task->corpse_vmobject_list_size);
2074 	}
2075 
2076 	task_ref_count_fini(task);
2077 
2078 	task->bsd_info_ro = proc_ro_release_task((proc_ro_t)task->bsd_info_ro);
2079 
2080 	if (task->bsd_info_ro != NULL) {
2081 		proc_ro_free(task->bsd_info_ro);
2082 		task->bsd_info_ro = NULL;
2083 	}
2084 
2085 	task_release_proc_task_struct(task);
2086 }
2087 
2088 /*
2089  *	task_name_deallocate_mig:
2090  *
2091  *	Drop a reference on a task name.
2092  */
2093 void
task_name_deallocate_mig(task_name_t task_name)2094 task_name_deallocate_mig(
2095 	task_name_t             task_name)
2096 {
2097 	return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2098 }
2099 
2100 /*
2101  *	task_policy_set_deallocate_mig:
2102  *
2103  *	Drop a reference on a task type.
2104  */
2105 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2106 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2107 {
2108 	return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2109 }
2110 
2111 /*
2112  *	task_policy_get_deallocate_mig:
2113  *
2114  *	Drop a reference on a task type.
2115  */
2116 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2117 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2118 {
2119 	return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2120 }
2121 
2122 /*
2123  *	task_inspect_deallocate_mig:
2124  *
2125  *	Drop a task inspection reference.
2126  */
2127 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2128 task_inspect_deallocate_mig(
2129 	task_inspect_t          task_inspect)
2130 {
2131 	return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2132 }
2133 
2134 /*
2135  *	task_read_deallocate_mig:
2136  *
2137  *	Drop a reference on task read port.
2138  */
2139 void
task_read_deallocate_mig(task_read_t task_read)2140 task_read_deallocate_mig(
2141 	task_read_t          task_read)
2142 {
2143 	return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2144 }
2145 
2146 /*
2147  *	task_suspension_token_deallocate:
2148  *
2149  *	Drop a reference on a task suspension token.
2150  */
2151 void
task_suspension_token_deallocate(task_suspension_token_t token)2152 task_suspension_token_deallocate(
2153 	task_suspension_token_t         token)
2154 {
2155 	return task_deallocate((task_t)token);
2156 }
2157 
2158 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2159 task_suspension_token_deallocate_grp(
2160 	task_suspension_token_t         token,
2161 	task_grp_t                      grp)
2162 {
2163 	return task_deallocate_grp((task_t)token, grp);
2164 }
2165 
2166 /*
2167  * task_collect_crash_info:
2168  *
2169  * collect crash info from bsd and mach based data
2170  */
2171 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2172 task_collect_crash_info(
2173 	task_t task,
2174 #ifdef CONFIG_MACF
2175 	struct label *crash_label,
2176 #endif
2177 	int is_corpse_fork)
2178 {
2179 	kern_return_t kr = KERN_SUCCESS;
2180 
2181 	kcdata_descriptor_t crash_data = NULL;
2182 	kcdata_descriptor_t crash_data_release = NULL;
2183 	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2184 	mach_vm_offset_t crash_data_ptr = 0;
2185 	void *crash_data_kernel = NULL;
2186 	void *crash_data_kernel_release = NULL;
2187 #if CONFIG_MACF
2188 	struct label *label, *free_label;
2189 #endif
2190 
2191 	if (!corpses_enabled()) {
2192 		return KERN_NOT_SUPPORTED;
2193 	}
2194 
2195 #if CONFIG_MACF
2196 	free_label = label = mac_exc_create_label(NULL);
2197 #endif
2198 
2199 	task_lock(task);
2200 
2201 	assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2202 	if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2203 #if CONFIG_MACF
2204 		/* Set the crash label, used by the exception delivery mac hook */
2205 		free_label = get_task_crash_label(task);         // Most likely NULL.
2206 		set_task_crash_label(task, label);
2207 		mac_exc_update_task_crash_label(task, crash_label);
2208 #endif
2209 		task_unlock(task);
2210 
2211 		crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2212 		    Z_WAITOK | Z_ZERO);
2213 		if (crash_data_kernel == NULL) {
2214 			kr = KERN_RESOURCE_SHORTAGE;
2215 			goto out_no_lock;
2216 		}
2217 		crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2218 
2219 		/* Do not get a corpse ref for corpse fork */
2220 		crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2221 		    is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2222 		    KCFLAG_USE_MEMCOPY);
2223 		if (crash_data) {
2224 			task_lock(task);
2225 			crash_data_release = task->corpse_info;
2226 			crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2227 			task->corpse_info = crash_data;
2228 
2229 			task_unlock(task);
2230 			kr = KERN_SUCCESS;
2231 		} else {
2232 			kfree_data(crash_data_kernel,
2233 			    CORPSEINFO_ALLOCATION_SIZE);
2234 			kr = KERN_FAILURE;
2235 		}
2236 
2237 		if (crash_data_release != NULL) {
2238 			task_crashinfo_destroy(crash_data_release);
2239 		}
2240 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2241 	} else {
2242 		task_unlock(task);
2243 	}
2244 
2245 out_no_lock:
2246 #if CONFIG_MACF
2247 	if (free_label != NULL) {
2248 		mac_exc_free_label(free_label);
2249 	}
2250 #endif
2251 	return kr;
2252 }
2253 
2254 /*
2255  * task_deliver_crash_notification:
2256  *
2257  * Makes outcall to registered host port for a corpse.
2258  */
2259 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2260 task_deliver_crash_notification(
2261 	task_t corpse, /* corpse or corpse fork */
2262 	thread_t thread,
2263 	exception_type_t etype,
2264 	mach_exception_subcode_t subcode)
2265 {
2266 	kcdata_descriptor_t crash_info = corpse->corpse_info;
2267 	thread_t th_iter = NULL;
2268 	kern_return_t kr = KERN_SUCCESS;
2269 	wait_interrupt_t wsave;
2270 	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2271 	ipc_port_t corpse_port;
2272 
2273 	if (crash_info == NULL) {
2274 		return KERN_FAILURE;
2275 	}
2276 
2277 	assert(task_is_a_corpse(corpse));
2278 
2279 	task_lock(corpse);
2280 
2281 	/*
2282 	 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2283 	 * Crash reporters should derive whether it's fatal from corpse blob.
2284 	 */
2285 	code[0] = etype;
2286 	code[1] = subcode;
2287 
2288 	queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2289 	{
2290 		if (th_iter->corpse_dup == FALSE) {
2291 			ipc_thread_reset(th_iter);
2292 		}
2293 	}
2294 	task_unlock(corpse);
2295 
2296 	/* Arm the no-sender notification for taskport */
2297 	task_reference(corpse);
2298 	corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2299 
2300 	wsave = thread_interrupt_level(THREAD_UNINT);
2301 	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2302 	if (kr != KERN_SUCCESS) {
2303 		printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2304 	}
2305 
2306 	(void)thread_interrupt_level(wsave);
2307 
2308 	/*
2309 	 * Drop the send right on corpse port, will fire the
2310 	 * no-sender notification if exception deliver failed.
2311 	 */
2312 	ipc_port_release_send(corpse_port);
2313 	return kr;
2314 }
2315 
2316 /*
2317  *	task_terminate:
2318  *
2319  *	Terminate the specified task.  See comments on thread_terminate
2320  *	(kern/thread.c) about problems with terminating the "current task."
2321  */
2322 
2323 kern_return_t
task_terminate(task_t task)2324 task_terminate(
2325 	task_t          task)
2326 {
2327 	if (task == TASK_NULL) {
2328 		return KERN_INVALID_ARGUMENT;
2329 	}
2330 
2331 	if (get_bsdtask_info(task)) {
2332 		return KERN_FAILURE;
2333 	}
2334 
2335 	return task_terminate_internal(task);
2336 }
2337 
2338 #if MACH_ASSERT
2339 extern int proc_pid(struct proc *);
2340 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2341 #endif /* MACH_ASSERT */
2342 
2343 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2344 static void
task_partial_reap(task_t task,__unused int pid)2345 __unused task_partial_reap(task_t task, __unused int pid)
2346 {
2347 	unsigned int    reclaimed_resident = 0;
2348 	unsigned int    reclaimed_compressed = 0;
2349 	uint64_t        task_page_count;
2350 
2351 	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2352 
2353 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2354 	    pid, task_page_count, 0, 0, 0);
2355 
2356 	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2357 
2358 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2359 	    pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2360 }
2361 
2362 /*
2363  * task_mark_corpse:
2364  *
2365  * Mark the task as a corpse. Called by crashing thread.
2366  */
2367 kern_return_t
task_mark_corpse(task_t task)2368 task_mark_corpse(task_t task)
2369 {
2370 	kern_return_t kr = KERN_SUCCESS;
2371 	thread_t self_thread;
2372 	(void) self_thread;
2373 	wait_interrupt_t wsave;
2374 #if CONFIG_MACF
2375 	struct label *crash_label = NULL;
2376 #endif
2377 
2378 	assert(task != kernel_task);
2379 	assert(task == current_task());
2380 	assert(!task_is_a_corpse(task));
2381 
2382 #if CONFIG_MACF
2383 	crash_label = mac_exc_create_label_for_proc((struct proc*)get_bsdtask_info(task));
2384 #endif
2385 
2386 	kr = task_collect_crash_info(task,
2387 #if CONFIG_MACF
2388 	    crash_label,
2389 #endif
2390 	    FALSE);
2391 	if (kr != KERN_SUCCESS) {
2392 		goto out;
2393 	}
2394 
2395 	self_thread = current_thread();
2396 
2397 	wsave = thread_interrupt_level(THREAD_UNINT);
2398 	task_lock(task);
2399 
2400 	/*
2401 	 * Check if any other thread called task_terminate_internal
2402 	 * and made the task inactive before we could mark it for
2403 	 * corpse pending report. Bail out if the task is inactive.
2404 	 */
2405 	if (!task->active) {
2406 		kcdata_descriptor_t crash_data_release = task->corpse_info;;
2407 		void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2408 
2409 		task->corpse_info = NULL;
2410 		task_unlock(task);
2411 
2412 		if (crash_data_release != NULL) {
2413 			task_crashinfo_destroy(crash_data_release);
2414 		}
2415 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2416 		return KERN_TERMINATED;
2417 	}
2418 
2419 	task_set_corpse_pending_report(task);
2420 	task_set_corpse(task);
2421 	task->crashed_thread_id = thread_tid(self_thread);
2422 
2423 	kr = task_start_halt_locked(task, TRUE);
2424 	assert(kr == KERN_SUCCESS);
2425 
2426 	task_set_uniqueid(task);
2427 
2428 	task_unlock(task);
2429 
2430 	/*
2431 	 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2432 	 * disable old ports here instead.
2433 	 *
2434 	 * The vm_map and ipc_space must exist until this function returns,
2435 	 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2436 	 */
2437 	ipc_task_disable(task);
2438 
2439 	/* terminate the ipc space */
2440 	ipc_space_terminate(task->itk_space);
2441 
2442 	/* Add it to global corpse task list */
2443 	task_add_to_corpse_task_list(task);
2444 
2445 	thread_terminate_internal(self_thread);
2446 
2447 	(void) thread_interrupt_level(wsave);
2448 	assert(task->halting == TRUE);
2449 
2450 out:
2451 #if CONFIG_MACF
2452 	mac_exc_free_label(crash_label);
2453 #endif
2454 	return kr;
2455 }
2456 
2457 /*
2458  *	task_set_uniqueid
2459  *
2460  *	Set task uniqueid to systemwide unique 64 bit value
2461  */
2462 void
task_set_uniqueid(task_t task)2463 task_set_uniqueid(task_t task)
2464 {
2465 	task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2466 }
2467 
2468 /*
2469  *	task_clear_corpse
2470  *
2471  *	Clears the corpse pending bit on task.
2472  *	Removes inspection bit on the threads.
2473  */
2474 void
task_clear_corpse(task_t task)2475 task_clear_corpse(task_t task)
2476 {
2477 	thread_t th_iter = NULL;
2478 
2479 	task_lock(task);
2480 	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2481 	{
2482 		thread_mtx_lock(th_iter);
2483 		th_iter->inspection = FALSE;
2484 		ipc_thread_disable(th_iter);
2485 		thread_mtx_unlock(th_iter);
2486 	}
2487 
2488 	thread_terminate_crashed_threads();
2489 	/* remove the pending corpse report flag */
2490 	task_clear_corpse_pending_report(task);
2491 
2492 	task_unlock(task);
2493 }
2494 
2495 /*
2496  *	task_port_no_senders
2497  *
2498  *	Called whenever the Mach port system detects no-senders on
2499  *	the task port of a corpse.
2500  *	Each notification that comes in should terminate the task (corpse).
2501  */
2502 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2503 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2504 {
2505 	task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2506 
2507 	assert(task != TASK_NULL);
2508 	assert(task_is_a_corpse(task));
2509 
2510 	/* Remove the task from global corpse task list */
2511 	task_remove_from_corpse_task_list(task);
2512 
2513 	task_clear_corpse(task);
2514 	task_terminate_internal(task);
2515 }
2516 
2517 /*
2518  *	task_port_with_flavor_no_senders
2519  *
2520  *	Called whenever the Mach port system detects no-senders on
2521  *	the task inspect or read port. These ports are allocated lazily and
2522  *	should be deallocated here when there are no senders remaining.
2523  */
2524 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2525 task_port_with_flavor_no_senders(
2526 	ipc_port_t          port,
2527 	mach_port_mscount_t mscount __unused)
2528 {
2529 	task_t task;
2530 	mach_task_flavor_t flavor;
2531 	ipc_kobject_type_t kotype;
2532 
2533 	ip_mq_lock(port);
2534 	if (port->ip_srights > 0) {
2535 		ip_mq_unlock(port);
2536 		return;
2537 	}
2538 	kotype = ip_kotype(port);
2539 	assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2540 	task = ipc_kobject_get_locked(port, kotype);
2541 	if (task != TASK_NULL) {
2542 		task_reference(task);
2543 	}
2544 	ip_mq_unlock(port);
2545 
2546 	if (task == TASK_NULL) {
2547 		/* The task is exiting or disabled; it will eventually deallocate the port */
2548 		return;
2549 	}
2550 
2551 	if (kotype == IKOT_TASK_READ) {
2552 		flavor = TASK_FLAVOR_READ;
2553 	} else {
2554 		flavor = TASK_FLAVOR_INSPECT;
2555 	}
2556 
2557 	itk_lock(task);
2558 	ip_mq_lock(port);
2559 
2560 	/*
2561 	 * If the port is no longer active, then ipc_task_terminate() ran
2562 	 * and destroyed the kobject already. Just deallocate the task
2563 	 * ref we took and go away.
2564 	 *
2565 	 * It is also possible that several nsrequests are in flight,
2566 	 * only one shall NULL-out the port entry, and this is the one
2567 	 * that gets to dealloc the port.
2568 	 *
2569 	 * Check for a stale no-senders notification. A call to any function
2570 	 * that vends out send rights to this port could resurrect it between
2571 	 * this notification being generated and actually being handled here.
2572 	 */
2573 	if (!ip_active(port) ||
2574 	    task->itk_task_ports[flavor] != port ||
2575 	    port->ip_srights > 0) {
2576 		ip_mq_unlock(port);
2577 		itk_unlock(task);
2578 		task_deallocate(task);
2579 		return;
2580 	}
2581 
2582 	assert(task->itk_task_ports[flavor] == port);
2583 	task->itk_task_ports[flavor] = IP_NULL;
2584 	itk_unlock(task);
2585 
2586 	ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2587 
2588 	task_deallocate(task);
2589 }
2590 
2591 /*
2592  *	task_wait_till_threads_terminate_locked
2593  *
2594  *	Wait till all the threads in the task are terminated.
2595  *	Might release the task lock and re-acquire it.
2596  */
2597 void
task_wait_till_threads_terminate_locked(task_t task)2598 task_wait_till_threads_terminate_locked(task_t task)
2599 {
2600 	/* wait for all the threads in the task to terminate */
2601 	while (task->active_thread_count != 0) {
2602 		assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2603 		task_unlock(task);
2604 		thread_block(THREAD_CONTINUE_NULL);
2605 
2606 		task_lock(task);
2607 	}
2608 }
2609 
2610 /*
2611  *	task_duplicate_map_and_threads
2612  *
2613  *	Copy vmmap of source task.
2614  *	Copy active threads from source task to destination task.
2615  *	Source task would be suspended during the copy.
2616  */
2617 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2618 task_duplicate_map_and_threads(
2619 	task_t task,
2620 	void *p,
2621 	task_t new_task,
2622 	thread_t *thread_ret,
2623 	uint64_t **udata_buffer,
2624 	int *size,
2625 	int *num_udata,
2626 	bool for_exception)
2627 {
2628 	kern_return_t kr = KERN_SUCCESS;
2629 	int active;
2630 	thread_t thread, self, thread_return = THREAD_NULL;
2631 	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2632 	thread_t *thread_array;
2633 	uint32_t active_thread_count = 0, array_count = 0, i;
2634 	vm_map_t oldmap;
2635 	uint64_t *buffer = NULL;
2636 	int buf_size = 0;
2637 	int est_knotes = 0, num_knotes = 0;
2638 
2639 	self = current_thread();
2640 
2641 	/*
2642 	 * Suspend the task to copy thread state, use the internal
2643 	 * variant so that no user-space process can resume
2644 	 * the task from under us
2645 	 */
2646 	kr = task_suspend_internal(task);
2647 	if (kr != KERN_SUCCESS) {
2648 		return kr;
2649 	}
2650 
2651 	if (task->map->disable_vmentry_reuse == TRUE) {
2652 		/*
2653 		 * Quite likely GuardMalloc (or some debugging tool)
2654 		 * is being used on this task. And it has gone through
2655 		 * its limit. Making a corpse will likely encounter
2656 		 * a lot of VM entries that will need COW.
2657 		 *
2658 		 * Skip it.
2659 		 */
2660 #if DEVELOPMENT || DEBUG
2661 		memorystatus_abort_vm_map_fork(task);
2662 #endif
2663 		task_resume_internal(task);
2664 		return KERN_FAILURE;
2665 	}
2666 
2667 	/* Check with VM if vm_map_fork is allowed for this task */
2668 	bool is_large = false;
2669 	if (memorystatus_allowed_vm_map_fork(task, &is_large)) {
2670 		/* Setup new task's vmmap, switch from parent task's map to it COW map */
2671 		oldmap = new_task->map;
2672 		new_task->map = vm_map_fork(new_task->ledger,
2673 		    task->map,
2674 		    (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2675 		    VM_MAP_FORK_PRESERVE_PURGEABLE |
2676 		    VM_MAP_FORK_CORPSE_FOOTPRINT));
2677 		if (new_task->map) {
2678 			new_task->is_large_corpse = is_large;
2679 			vm_map_deallocate(oldmap);
2680 
2681 			/* copy ledgers that impact the memory footprint */
2682 			vm_map_copy_footprint_ledgers(task, new_task);
2683 
2684 			/* Get all the udata pointers from kqueue */
2685 			est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2686 			if (est_knotes > 0) {
2687 				buf_size = (est_knotes + 32) * sizeof(uint64_t);
2688 				buffer = kalloc_data(buf_size, Z_WAITOK);
2689 				num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2690 				if (num_knotes > est_knotes + 32) {
2691 					num_knotes = est_knotes + 32;
2692 				}
2693 			}
2694 		} else {
2695 			if (is_large) {
2696 				assert(large_corpse_count > 0);
2697 				OSDecrementAtomic(&large_corpse_count);
2698 			}
2699 			new_task->map = oldmap;
2700 #if DEVELOPMENT || DEBUG
2701 			memorystatus_abort_vm_map_fork(task);
2702 #endif
2703 			task_resume_internal(task);
2704 			return KERN_NO_SPACE;
2705 		}
2706 	} else if (!for_exception) {
2707 #if DEVELOPMENT || DEBUG
2708 		memorystatus_abort_vm_map_fork(task);
2709 #endif
2710 		task_resume_internal(task);
2711 		return KERN_NO_SPACE;
2712 	}
2713 
2714 	active_thread_count = task->active_thread_count;
2715 	if (active_thread_count == 0) {
2716 		kfree_data(buffer, buf_size);
2717 		task_resume_internal(task);
2718 		return KERN_FAILURE;
2719 	}
2720 
2721 	thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2722 
2723 	/* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2724 	task_lock(task);
2725 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2726 		/* Skip inactive threads */
2727 		active = thread->active;
2728 		if (!active) {
2729 			continue;
2730 		}
2731 
2732 		if (array_count >= active_thread_count) {
2733 			break;
2734 		}
2735 
2736 		thread_array[array_count++] = thread;
2737 		thread_reference(thread);
2738 	}
2739 	task_unlock(task);
2740 
2741 	for (i = 0; i < array_count; i++) {
2742 		kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2743 		if (kr != KERN_SUCCESS) {
2744 			break;
2745 		}
2746 
2747 		/* Equivalent of current thread in corpse */
2748 		if (thread_array[i] == self) {
2749 			thread_return = new_thread;
2750 			new_task->crashed_thread_id = thread_tid(new_thread);
2751 		} else if (first_thread == NULL) {
2752 			first_thread = new_thread;
2753 		} else {
2754 			/* drop the extra ref returned by thread_create_with_continuation */
2755 			thread_deallocate(new_thread);
2756 		}
2757 
2758 		kr = thread_dup2(thread_array[i], new_thread);
2759 		if (kr != KERN_SUCCESS) {
2760 			thread_mtx_lock(new_thread);
2761 			new_thread->corpse_dup = TRUE;
2762 			thread_mtx_unlock(new_thread);
2763 			continue;
2764 		}
2765 
2766 		/* Copy thread name */
2767 		bsd_copythreadname(get_bsdthread_info(new_thread),
2768 		    get_bsdthread_info(thread_array[i]));
2769 		new_thread->thread_tag = thread_array[i]->thread_tag &
2770 		    ~THREAD_TAG_USER_JOIN;
2771 		thread_copy_resource_info(new_thread, thread_array[i]);
2772 	}
2773 
2774 	/* return the first thread if we couldn't find the equivalent of current */
2775 	if (thread_return == THREAD_NULL) {
2776 		thread_return = first_thread;
2777 	} else if (first_thread != THREAD_NULL) {
2778 		/* drop the extra ref returned by thread_create_with_continuation */
2779 		thread_deallocate(first_thread);
2780 	}
2781 
2782 	task_resume_internal(task);
2783 
2784 	for (i = 0; i < array_count; i++) {
2785 		thread_deallocate(thread_array[i]);
2786 	}
2787 	kfree_type(thread_t, active_thread_count, thread_array);
2788 
2789 	if (kr == KERN_SUCCESS) {
2790 		*thread_ret = thread_return;
2791 		*udata_buffer = buffer;
2792 		*size = buf_size;
2793 		*num_udata = num_knotes;
2794 	} else {
2795 		if (thread_return != THREAD_NULL) {
2796 			thread_deallocate(thread_return);
2797 		}
2798 		kfree_data(buffer, buf_size);
2799 	}
2800 
2801 	return kr;
2802 }
2803 
2804 #if CONFIG_SECLUDED_MEMORY
2805 extern void task_set_can_use_secluded_mem_locked(
2806 	task_t          task,
2807 	boolean_t       can_use_secluded_mem);
2808 #endif /* CONFIG_SECLUDED_MEMORY */
2809 
2810 #if MACH_ASSERT
2811 int debug4k_panic_on_terminate = 0;
2812 #endif /* MACH_ASSERT */
2813 kern_return_t
task_terminate_internal(task_t task)2814 task_terminate_internal(
2815 	task_t                  task)
2816 {
2817 	thread_t                        thread, self;
2818 	task_t                          self_task;
2819 	boolean_t                       interrupt_save;
2820 	int                             pid = 0;
2821 
2822 	assert(task != kernel_task);
2823 
2824 	self = current_thread();
2825 	self_task = current_task();
2826 
2827 	/*
2828 	 *	Get the task locked and make sure that we are not racing
2829 	 *	with someone else trying to terminate us.
2830 	 */
2831 	if (task == self_task) {
2832 		task_lock(task);
2833 	} else if (task < self_task) {
2834 		task_lock(task);
2835 		task_lock(self_task);
2836 	} else {
2837 		task_lock(self_task);
2838 		task_lock(task);
2839 	}
2840 
2841 #if CONFIG_SECLUDED_MEMORY
2842 	if (task->task_can_use_secluded_mem) {
2843 		task_set_can_use_secluded_mem_locked(task, FALSE);
2844 	}
2845 	task->task_could_use_secluded_mem = FALSE;
2846 	task->task_could_also_use_secluded_mem = FALSE;
2847 
2848 	if (task->task_suppressed_secluded) {
2849 		stop_secluded_suppression(task);
2850 	}
2851 #endif /* CONFIG_SECLUDED_MEMORY */
2852 
2853 	if (!task->active) {
2854 		/*
2855 		 *	Task is already being terminated.
2856 		 *	Just return an error. If we are dying, this will
2857 		 *	just get us to our AST special handler and that
2858 		 *	will get us to finalize the termination of ourselves.
2859 		 */
2860 		task_unlock(task);
2861 		if (self_task != task) {
2862 			task_unlock(self_task);
2863 		}
2864 
2865 		return KERN_FAILURE;
2866 	}
2867 
2868 	if (task_corpse_pending_report(task)) {
2869 		/*
2870 		 *	Task is marked for reporting as corpse.
2871 		 *	Just return an error. This will
2872 		 *	just get us to our AST special handler and that
2873 		 *	will get us to finish the path to death
2874 		 */
2875 		task_unlock(task);
2876 		if (self_task != task) {
2877 			task_unlock(self_task);
2878 		}
2879 
2880 		return KERN_FAILURE;
2881 	}
2882 
2883 	if (self_task != task) {
2884 		task_unlock(self_task);
2885 	}
2886 
2887 	/*
2888 	 * Make sure the current thread does not get aborted out of
2889 	 * the waits inside these operations.
2890 	 */
2891 	interrupt_save = thread_interrupt_level(THREAD_UNINT);
2892 
2893 	/*
2894 	 *	Indicate that we want all the threads to stop executing
2895 	 *	at user space by holding the task (we would have held
2896 	 *	each thread independently in thread_terminate_internal -
2897 	 *	but this way we may be more likely to already find it
2898 	 *	held there).  Mark the task inactive, and prevent
2899 	 *	further task operations via the task port.
2900 	 *
2901 	 *	The vm_map and ipc_space must exist until this function returns,
2902 	 *	convert_port_to_{map,space}_with_flavor relies on this behavior.
2903 	 */
2904 	task_hold_locked(task);
2905 	task->active = FALSE;
2906 	ipc_task_disable(task);
2907 
2908 #if CONFIG_TELEMETRY
2909 	/*
2910 	 * Notify telemetry that this task is going away.
2911 	 */
2912 	telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2913 #endif
2914 
2915 	/*
2916 	 *	Terminate each thread in the task.
2917 	 */
2918 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2919 		thread_terminate_internal(thread);
2920 	}
2921 
2922 #ifdef MACH_BSD
2923 	void *bsd_info = get_bsdtask_info(task);
2924 	if (bsd_info != NULL) {
2925 		pid = proc_pid(bsd_info);
2926 	}
2927 #endif /* MACH_BSD */
2928 
2929 	task_unlock(task);
2930 
2931 	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2932 	    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2933 
2934 	/* Early object reap phase */
2935 
2936 // PR-17045188: Revisit implementation
2937 //        task_partial_reap(task, pid);
2938 
2939 #if CONFIG_TASKWATCH
2940 	/*
2941 	 * remove all task watchers
2942 	 */
2943 	task_removewatchers(task);
2944 
2945 #endif /* CONFIG_TASKWATCH */
2946 
2947 	/*
2948 	 *	Destroy all synchronizers owned by the task.
2949 	 */
2950 	task_synchronizer_destroy_all(task);
2951 
2952 	/*
2953 	 *	Clear the watchport boost on the task.
2954 	 */
2955 	task_remove_turnstile_watchports(task);
2956 
2957 	/*
2958 	 *	Destroy the IPC space, leaving just a reference for it.
2959 	 */
2960 	ipc_space_terminate(task->itk_space);
2961 
2962 #if 00
2963 	/* if some ledgers go negative on tear-down again... */
2964 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2965 	    task_ledgers.phys_footprint);
2966 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2967 	    task_ledgers.internal);
2968 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2969 	    task_ledgers.iokit_mapped);
2970 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2971 	    task_ledgers.alternate_accounting);
2972 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2973 	    task_ledgers.alternate_accounting_compressed);
2974 #endif
2975 
2976 	/*
2977 	 * If the current thread is a member of the task
2978 	 * being terminated, then the last reference to
2979 	 * the task will not be dropped until the thread
2980 	 * is finally reaped.  To avoid incurring the
2981 	 * expense of removing the address space regions
2982 	 * at reap time, we do it explictly here.
2983 	 */
2984 
2985 #if MACH_ASSERT
2986 	/*
2987 	 * Identify the pmap's process, in case the pmap ledgers drift
2988 	 * and we have to report it.
2989 	 */
2990 	char procname[17];
2991 	void *proc = get_bsdtask_info(task);
2992 	if (proc) {
2993 		pid = proc_pid(proc);
2994 		proc_name_kdp(proc, procname, sizeof(procname));
2995 	} else {
2996 		pid = 0;
2997 		strlcpy(procname, "<unknown>", sizeof(procname));
2998 	}
2999 	pmap_set_process(task->map->pmap, pid, procname);
3000 	if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3001 		DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3002 		if (debug4k_panic_on_terminate) {
3003 			panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3004 		}
3005 	}
3006 #endif /* MACH_ASSERT */
3007 
3008 	vm_map_terminate(task->map);
3009 
3010 	/* release our shared region */
3011 	vm_shared_region_set(task, NULL);
3012 
3013 #if __has_feature(ptrauth_calls)
3014 	task_set_shared_region_id(task, NULL);
3015 #endif /* __has_feature(ptrauth_calls) */
3016 
3017 	lck_mtx_lock(&tasks_threads_lock);
3018 	queue_remove(&tasks, task, task_t, tasks);
3019 	queue_enter(&terminated_tasks, task, task_t, tasks);
3020 	tasks_count--;
3021 	terminated_tasks_count++;
3022 	lck_mtx_unlock(&tasks_threads_lock);
3023 
3024 	/*
3025 	 * We no longer need to guard against being aborted, so restore
3026 	 * the previous interruptible state.
3027 	 */
3028 	thread_interrupt_level(interrupt_save);
3029 
3030 #if KPC
3031 	/* force the task to release all ctrs */
3032 	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3033 		kpc_force_all_ctrs(task, 0);
3034 	}
3035 #endif /* KPC */
3036 
3037 #if CONFIG_COALITIONS
3038 	/*
3039 	 * Leave the coalition for corpse task or task that
3040 	 * never had any active threads (e.g. fork, exec failure).
3041 	 * For task with active threads, the task will be removed
3042 	 * from coalition by last terminating thread.
3043 	 */
3044 	if (task->active_thread_count == 0) {
3045 		coalitions_remove_task(task);
3046 	}
3047 #endif
3048 
3049 #if CONFIG_FREEZE
3050 	extern int      vm_compressor_available;
3051 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3052 		task_disown_frozen_csegs(task);
3053 		assert(queue_empty(&task->task_frozen_cseg_q));
3054 	}
3055 #endif /* CONFIG_FREEZE */
3056 
3057 
3058 	/*
3059 	 * Get rid of the task active reference on itself.
3060 	 */
3061 	task_deallocate_grp(task, TASK_GRP_INTERNAL);
3062 
3063 	return KERN_SUCCESS;
3064 }
3065 
3066 void
tasks_system_suspend(boolean_t suspend)3067 tasks_system_suspend(boolean_t suspend)
3068 {
3069 	task_t task;
3070 
3071 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3072 	    (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3073 
3074 	lck_mtx_lock(&tasks_threads_lock);
3075 	assert(tasks_suspend_state != suspend);
3076 	tasks_suspend_state = suspend;
3077 	queue_iterate(&tasks, task, task_t, tasks) {
3078 		if (task == kernel_task) {
3079 			continue;
3080 		}
3081 		suspend ? task_suspend_internal(task) : task_resume_internal(task);
3082 	}
3083 	lck_mtx_unlock(&tasks_threads_lock);
3084 }
3085 
3086 /*
3087  * task_start_halt:
3088  *
3089  *      Shut the current task down (except for the current thread) in
3090  *	preparation for dramatic changes to the task (probably exec).
3091  *	We hold the task and mark all other threads in the task for
3092  *	termination.
3093  */
3094 kern_return_t
task_start_halt(task_t task)3095 task_start_halt(task_t task)
3096 {
3097 	kern_return_t kr = KERN_SUCCESS;
3098 	task_lock(task);
3099 	kr = task_start_halt_locked(task, FALSE);
3100 	task_unlock(task);
3101 	return kr;
3102 }
3103 
3104 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3105 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3106 {
3107 	thread_t thread, self;
3108 	uint64_t dispatchqueue_offset;
3109 
3110 	assert(task != kernel_task);
3111 
3112 	self = current_thread();
3113 
3114 	if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3115 		return KERN_INVALID_ARGUMENT;
3116 	}
3117 
3118 	if (!should_mark_corpse &&
3119 	    (task->halting || !task->active || !self->active)) {
3120 		/*
3121 		 * Task or current thread is already being terminated.
3122 		 * Hurry up and return out of the current kernel context
3123 		 * so that we run our AST special handler to terminate
3124 		 * ourselves. If should_mark_corpse is set, the corpse
3125 		 * creation might have raced with exec, let the corpse
3126 		 * creation continue, once the current thread reaches AST
3127 		 * thread in exec will be woken up from task_complete_halt.
3128 		 * Exec will fail cause the proc was marked for exit.
3129 		 * Once the thread in exec reaches AST, it will call proc_exit
3130 		 * and deliver the EXC_CORPSE_NOTIFY.
3131 		 */
3132 		return KERN_FAILURE;
3133 	}
3134 
3135 	/* Thread creation will fail after this point of no return. */
3136 	task->halting = TRUE;
3137 
3138 	/*
3139 	 * Mark all the threads to keep them from starting any more
3140 	 * user-level execution. The thread_terminate_internal code
3141 	 * would do this on a thread by thread basis anyway, but this
3142 	 * gives us a better chance of not having to wait there.
3143 	 */
3144 	task_hold_locked(task);
3145 	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3146 
3147 	/*
3148 	 * Terminate all the other threads in the task.
3149 	 */
3150 	queue_iterate(&task->threads, thread, thread_t, task_threads)
3151 	{
3152 		/*
3153 		 * Remove priority throttles for threads to terminate timely. This has
3154 		 * to be done after task_hold_locked() traps all threads to AST, but before
3155 		 * threads are marked inactive in thread_terminate_internal(). Takes thread
3156 		 * mutex lock.
3157 		 *
3158 		 * We need task_is_a_corpse() check so that we don't accidently update policy
3159 		 * for tasks that are doing posix_spawn().
3160 		 *
3161 		 * See: thread_policy_update_tasklocked().
3162 		 */
3163 		if (task_is_a_corpse(task)) {
3164 			proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3165 			    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3166 		}
3167 
3168 		if (should_mark_corpse) {
3169 			thread_mtx_lock(thread);
3170 			thread->inspection = TRUE;
3171 			thread_mtx_unlock(thread);
3172 		}
3173 		if (thread != self) {
3174 			thread_terminate_internal(thread);
3175 		}
3176 	}
3177 	task->dispatchqueue_offset = dispatchqueue_offset;
3178 
3179 	task_release_locked(task);
3180 
3181 	return KERN_SUCCESS;
3182 }
3183 
3184 
3185 /*
3186  * task_complete_halt:
3187  *
3188  *	Complete task halt by waiting for threads to terminate, then clean
3189  *	up task resources (VM, port namespace, etc...) and then let the
3190  *	current thread go in the (practically empty) task context.
3191  *
3192  *	Note: task->halting flag is not cleared in order to avoid creation
3193  *	of new thread in old exec'ed task.
3194  */
3195 void
task_complete_halt(task_t task)3196 task_complete_halt(task_t task)
3197 {
3198 	task_lock(task);
3199 	assert(task->halting);
3200 	assert(task == current_task());
3201 
3202 	/*
3203 	 *	Wait for the other threads to get shut down.
3204 	 *      When the last other thread is reaped, we'll be
3205 	 *	woken up.
3206 	 */
3207 	if (task->thread_count > 1) {
3208 		assert_wait((event_t)&task->halting, THREAD_UNINT);
3209 		task_unlock(task);
3210 		thread_block(THREAD_CONTINUE_NULL);
3211 	} else {
3212 		task_unlock(task);
3213 	}
3214 
3215 	/*
3216 	 *	Give the machine dependent code a chance
3217 	 *	to perform cleanup of task-level resources
3218 	 *	associated with the current thread before
3219 	 *	ripping apart the task.
3220 	 */
3221 	machine_task_terminate(task);
3222 
3223 	/*
3224 	 *	Destroy all synchronizers owned by the task.
3225 	 */
3226 	task_synchronizer_destroy_all(task);
3227 
3228 	/*
3229 	 *	Terminate the IPC space.  A long time ago,
3230 	 *	this used to be ipc_space_clean() which would
3231 	 *	keep the space active but hollow it.
3232 	 *
3233 	 *	We really do not need this semantics given
3234 	 *	tasks die with exec now.
3235 	 */
3236 	ipc_space_terminate(task->itk_space);
3237 
3238 	/*
3239 	 * Clean out the address space, as we are going to be
3240 	 * getting a new one.
3241 	 */
3242 	vm_map_terminate(task->map);
3243 
3244 	/*
3245 	 * Kick out any IOKitUser handles to the task. At best they're stale,
3246 	 * at worst someone is racing a SUID exec.
3247 	 */
3248 	iokit_task_terminate(task);
3249 }
3250 
3251 /*
3252  *	task_hold_locked:
3253  *
3254  *	Suspend execution of the specified task.
3255  *	This is a recursive-style suspension of the task, a count of
3256  *	suspends is maintained.
3257  *
3258  *	CONDITIONS: the task is locked and active.
3259  */
3260 void
task_hold_locked(task_t task)3261 task_hold_locked(
3262 	task_t          task)
3263 {
3264 	thread_t        thread;
3265 	void *bsd_info = get_bsdtask_info(task);
3266 
3267 	assert(task->active);
3268 
3269 	if (task->suspend_count++ > 0) {
3270 		return;
3271 	}
3272 
3273 	if (bsd_info) {
3274 		workq_proc_suspended(bsd_info);
3275 	}
3276 
3277 	/*
3278 	 *	Iterate through all the threads and hold them.
3279 	 */
3280 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3281 		thread_mtx_lock(thread);
3282 		thread_hold(thread);
3283 		thread_mtx_unlock(thread);
3284 	}
3285 }
3286 
3287 /*
3288  *	task_hold:
3289  *
3290  *	Same as the internal routine above, except that is must lock
3291  *	and verify that the task is active.  This differs from task_suspend
3292  *	in that it places a kernel hold on the task rather than just a
3293  *	user-level hold.  This keeps users from over resuming and setting
3294  *	it running out from under the kernel.
3295  *
3296  *      CONDITIONS: the caller holds a reference on the task
3297  */
3298 kern_return_t
task_hold(task_t task)3299 task_hold(
3300 	task_t          task)
3301 {
3302 	if (task == TASK_NULL) {
3303 		return KERN_INVALID_ARGUMENT;
3304 	}
3305 
3306 	task_lock(task);
3307 
3308 	if (!task->active) {
3309 		task_unlock(task);
3310 
3311 		return KERN_FAILURE;
3312 	}
3313 
3314 	task_hold_locked(task);
3315 	task_unlock(task);
3316 
3317 	return KERN_SUCCESS;
3318 }
3319 
3320 kern_return_t
task_wait(task_t task,boolean_t until_not_runnable)3321 task_wait(
3322 	task_t          task,
3323 	boolean_t       until_not_runnable)
3324 {
3325 	if (task == TASK_NULL) {
3326 		return KERN_INVALID_ARGUMENT;
3327 	}
3328 
3329 	task_lock(task);
3330 
3331 	if (!task->active) {
3332 		task_unlock(task);
3333 
3334 		return KERN_FAILURE;
3335 	}
3336 
3337 	task_wait_locked(task, until_not_runnable);
3338 	task_unlock(task);
3339 
3340 	return KERN_SUCCESS;
3341 }
3342 
3343 /*
3344  *	task_wait_locked:
3345  *
3346  *	Wait for all threads in task to stop.
3347  *
3348  * Conditions:
3349  *	Called with task locked, active, and held.
3350  */
3351 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3352 task_wait_locked(
3353 	task_t          task,
3354 	boolean_t               until_not_runnable)
3355 {
3356 	thread_t        thread, self;
3357 
3358 	assert(task->active);
3359 	assert(task->suspend_count > 0);
3360 
3361 	self = current_thread();
3362 
3363 	/*
3364 	 *	Iterate through all the threads and wait for them to
3365 	 *	stop.  Do not wait for the current thread if it is within
3366 	 *	the task.
3367 	 */
3368 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3369 		if (thread != self) {
3370 			thread_wait(thread, until_not_runnable);
3371 		}
3372 	}
3373 }
3374 
3375 boolean_t
task_is_app_suspended(task_t task)3376 task_is_app_suspended(task_t task)
3377 {
3378 	return task->pidsuspended;
3379 }
3380 
3381 /*
3382  *	task_release_locked:
3383  *
3384  *	Release a kernel hold on a task.
3385  *
3386  *      CONDITIONS: the task is locked and active
3387  */
3388 void
task_release_locked(task_t task)3389 task_release_locked(
3390 	task_t          task)
3391 {
3392 	thread_t        thread;
3393 	void *bsd_info = get_bsdtask_info(task);
3394 
3395 	assert(task->active);
3396 	assert(task->suspend_count > 0);
3397 
3398 	if (--task->suspend_count > 0) {
3399 		return;
3400 	}
3401 
3402 	if (bsd_info) {
3403 		workq_proc_resumed(bsd_info);
3404 	}
3405 
3406 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3407 		thread_mtx_lock(thread);
3408 		thread_release(thread);
3409 		thread_mtx_unlock(thread);
3410 	}
3411 }
3412 
3413 /*
3414  *	task_release:
3415  *
3416  *	Same as the internal routine above, except that it must lock
3417  *	and verify that the task is active.
3418  *
3419  *      CONDITIONS: The caller holds a reference to the task
3420  */
3421 kern_return_t
task_release(task_t task)3422 task_release(
3423 	task_t          task)
3424 {
3425 	if (task == TASK_NULL) {
3426 		return KERN_INVALID_ARGUMENT;
3427 	}
3428 
3429 	task_lock(task);
3430 
3431 	if (!task->active) {
3432 		task_unlock(task);
3433 
3434 		return KERN_FAILURE;
3435 	}
3436 
3437 	task_release_locked(task);
3438 	task_unlock(task);
3439 
3440 	return KERN_SUCCESS;
3441 }
3442 
3443 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3444 task_threads_internal(
3445 	task_t                      task,
3446 	thread_act_array_t         *threads_out,
3447 	mach_msg_type_number_t     *countp,
3448 	mach_thread_flavor_t        flavor)
3449 {
3450 	mach_msg_type_number_t  actual, count, count_needed;
3451 	thread_t               *thread_list;
3452 	thread_t                thread;
3453 	unsigned int            i;
3454 
3455 	count = 0;
3456 	thread_list = NULL;
3457 
3458 	if (task == TASK_NULL) {
3459 		return KERN_INVALID_ARGUMENT;
3460 	}
3461 
3462 	assert(flavor <= THREAD_FLAVOR_INSPECT);
3463 
3464 	for (;;) {
3465 		task_lock(task);
3466 		if (!task->active) {
3467 			task_unlock(task);
3468 
3469 			kfree_type(thread_t, count, thread_list);
3470 			return KERN_FAILURE;
3471 		}
3472 
3473 		count_needed = actual = task->thread_count;
3474 		if (count_needed <= count) {
3475 			break;
3476 		}
3477 
3478 		/* unlock the task and allocate more memory */
3479 		task_unlock(task);
3480 
3481 		kfree_type(thread_t, count, thread_list);
3482 		count = count_needed;
3483 		thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3484 
3485 		if (thread_list == NULL) {
3486 			return KERN_RESOURCE_SHORTAGE;
3487 		}
3488 	}
3489 
3490 	i = 0;
3491 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3492 		assert(i < actual);
3493 		thread_reference(thread);
3494 		thread_list[i++] = thread;
3495 	}
3496 
3497 	count_needed = actual;
3498 
3499 	/* can unlock task now that we've got the thread refs */
3500 	task_unlock(task);
3501 
3502 	if (actual == 0) {
3503 		/* no threads, so return null pointer and deallocate memory */
3504 
3505 		*threads_out = NULL;
3506 		*countp = 0;
3507 		kfree_type(thread_t, count, thread_list);
3508 	} else {
3509 		/* if we allocated too much, must copy */
3510 		if (count_needed < count) {
3511 			void *newaddr;
3512 
3513 			newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3514 			if (newaddr == NULL) {
3515 				for (i = 0; i < actual; ++i) {
3516 					thread_deallocate(thread_list[i]);
3517 				}
3518 				kfree_type(thread_t, count, thread_list);
3519 				return KERN_RESOURCE_SHORTAGE;
3520 			}
3521 
3522 			bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3523 			kfree_type(thread_t, count, thread_list);
3524 			thread_list = (thread_t *)newaddr;
3525 		}
3526 
3527 		*threads_out = thread_list;
3528 		*countp = actual;
3529 
3530 		/* do the conversion that Mig should handle */
3531 
3532 		switch (flavor) {
3533 		case THREAD_FLAVOR_CONTROL:
3534 			if (task == current_task()) {
3535 				for (i = 0; i < actual; ++i) {
3536 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3537 				}
3538 			} else {
3539 				for (i = 0; i < actual; ++i) {
3540 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3541 				}
3542 			}
3543 			break;
3544 		case THREAD_FLAVOR_READ:
3545 			for (i = 0; i < actual; ++i) {
3546 				((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3547 			}
3548 			break;
3549 		case THREAD_FLAVOR_INSPECT:
3550 			for (i = 0; i < actual; ++i) {
3551 				((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3552 			}
3553 			break;
3554 		}
3555 	}
3556 
3557 	return KERN_SUCCESS;
3558 }
3559 
3560 kern_return_t
task_threads(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3561 task_threads(
3562 	task_t                      task,
3563 	thread_act_array_t         *threads_out,
3564 	mach_msg_type_number_t     *count)
3565 {
3566 	return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3567 }
3568 
3569 
3570 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3571 task_threads_from_user(
3572 	mach_port_t                 port,
3573 	thread_act_array_t         *threads_out,
3574 	mach_msg_type_number_t     *count)
3575 {
3576 	ipc_kobject_type_t kotype;
3577 	kern_return_t kr;
3578 
3579 	task_t task = convert_port_to_task_inspect_no_eval(port);
3580 
3581 	if (task == TASK_NULL) {
3582 		return KERN_INVALID_ARGUMENT;
3583 	}
3584 
3585 	kotype = ip_kotype(port);
3586 
3587 	switch (kotype) {
3588 	case IKOT_TASK_CONTROL:
3589 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3590 		break;
3591 	case IKOT_TASK_READ:
3592 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3593 		break;
3594 	case IKOT_TASK_INSPECT:
3595 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3596 		break;
3597 	default:
3598 		panic("strange kobject type");
3599 		break;
3600 	}
3601 
3602 	task_deallocate(task);
3603 	return kr;
3604 }
3605 
3606 #define TASK_HOLD_NORMAL        0
3607 #define TASK_HOLD_PIDSUSPEND    1
3608 #define TASK_HOLD_LEGACY        2
3609 #define TASK_HOLD_LEGACY_ALL    3
3610 
3611 static kern_return_t
place_task_hold(task_t task,int mode)3612 place_task_hold(
3613 	task_t task,
3614 	int mode)
3615 {
3616 	if (!task->active && !task_is_a_corpse(task)) {
3617 		return KERN_FAILURE;
3618 	}
3619 
3620 	/* Return success for corpse task */
3621 	if (task_is_a_corpse(task)) {
3622 		return KERN_SUCCESS;
3623 	}
3624 
3625 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3626 	    task_pid(task),
3627 	    task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3628 	    task->user_stop_count, task->user_stop_count + 1);
3629 
3630 #if MACH_ASSERT
3631 	current_task()->suspends_outstanding++;
3632 #endif
3633 
3634 	if (mode == TASK_HOLD_LEGACY) {
3635 		task->legacy_stop_count++;
3636 	}
3637 
3638 	if (task->user_stop_count++ > 0) {
3639 		/*
3640 		 *	If the stop count was positive, the task is
3641 		 *	already stopped and we can exit.
3642 		 */
3643 		return KERN_SUCCESS;
3644 	}
3645 
3646 	/*
3647 	 * Put a kernel-level hold on the threads in the task (all
3648 	 * user-level task suspensions added together represent a
3649 	 * single kernel-level hold).  We then wait for the threads
3650 	 * to stop executing user code.
3651 	 */
3652 	task_hold_locked(task);
3653 	task_wait_locked(task, FALSE);
3654 
3655 	return KERN_SUCCESS;
3656 }
3657 
3658 static kern_return_t
release_task_hold(task_t task,int mode)3659 release_task_hold(
3660 	task_t          task,
3661 	int                     mode)
3662 {
3663 	boolean_t release = FALSE;
3664 
3665 	if (!task->active && !task_is_a_corpse(task)) {
3666 		return KERN_FAILURE;
3667 	}
3668 
3669 	/* Return success for corpse task */
3670 	if (task_is_a_corpse(task)) {
3671 		return KERN_SUCCESS;
3672 	}
3673 
3674 	if (mode == TASK_HOLD_PIDSUSPEND) {
3675 		if (task->pidsuspended == FALSE) {
3676 			return KERN_FAILURE;
3677 		}
3678 		task->pidsuspended = FALSE;
3679 	}
3680 
3681 	if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3682 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3683 		    MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3684 		    task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3685 		    task->user_stop_count, mode, task->legacy_stop_count);
3686 
3687 #if MACH_ASSERT
3688 		/*
3689 		 * This is obviously not robust; if we suspend one task and then resume a different one,
3690 		 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3691 		 * or buggy suspender.
3692 		 */
3693 		current_task()->suspends_outstanding--;
3694 #endif
3695 
3696 		if (mode == TASK_HOLD_LEGACY_ALL) {
3697 			if (task->legacy_stop_count >= task->user_stop_count) {
3698 				task->user_stop_count = 0;
3699 				release = TRUE;
3700 			} else {
3701 				task->user_stop_count -= task->legacy_stop_count;
3702 			}
3703 			task->legacy_stop_count = 0;
3704 		} else {
3705 			if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3706 				task->legacy_stop_count--;
3707 			}
3708 			if (--task->user_stop_count == 0) {
3709 				release = TRUE;
3710 			}
3711 		}
3712 	} else {
3713 		return KERN_FAILURE;
3714 	}
3715 
3716 	/*
3717 	 *	Release the task if necessary.
3718 	 */
3719 	if (release) {
3720 		task_release_locked(task);
3721 	}
3722 
3723 	return KERN_SUCCESS;
3724 }
3725 
3726 boolean_t
get_task_suspended(task_t task)3727 get_task_suspended(task_t task)
3728 {
3729 	return 0 != task->user_stop_count;
3730 }
3731 
3732 /*
3733  *	task_suspend:
3734  *
3735  *	Implement an (old-fashioned) user-level suspension on a task.
3736  *
3737  *	Because the user isn't expecting to have to manage a suspension
3738  *	token, we'll track it for him in the kernel in the form of a naked
3739  *	send right to the task's resume port.  All such send rights
3740  *	account for a single suspension against the task (unlike task_suspend2()
3741  *	where each caller gets a unique suspension count represented by a
3742  *	unique send-once right).
3743  *
3744  * Conditions:
3745  *      The caller holds a reference to the task
3746  */
3747 kern_return_t
task_suspend(task_t task)3748 task_suspend(
3749 	task_t          task)
3750 {
3751 	kern_return_t                   kr;
3752 	mach_port_t                     port;
3753 	mach_port_name_t                name;
3754 
3755 	if (task == TASK_NULL || task == kernel_task) {
3756 		return KERN_INVALID_ARGUMENT;
3757 	}
3758 
3759 	/*
3760 	 * place a legacy hold on the task.
3761 	 */
3762 	task_lock(task);
3763 	kr = place_task_hold(task, TASK_HOLD_LEGACY);
3764 	task_unlock(task);
3765 
3766 	if (kr != KERN_SUCCESS) {
3767 		return kr;
3768 	}
3769 
3770 	/*
3771 	 * Claim a send right on the task resume port, and request a no-senders
3772 	 * notification on that port (if none outstanding).
3773 	 */
3774 	itk_lock(task);
3775 	port = task->itk_resume;
3776 	if (port == IP_NULL) {
3777 		port = ipc_kobject_alloc_port(task, IKOT_TASK_RESUME,
3778 		    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
3779 		task->itk_resume = port;
3780 	} else {
3781 		(void)ipc_kobject_make_send_nsrequest(port, task, IKOT_TASK_RESUME);
3782 	}
3783 	itk_unlock(task);
3784 
3785 	/*
3786 	 * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3787 	 * but we'll look it up when calling a traditional resume.  Any IPC operations that
3788 	 * deallocate the send right will auto-release the suspension.
3789 	 */
3790 	if (IP_VALID(port)) {
3791 		kr = ipc_object_copyout(current_space(), ip_to_object(port),
3792 		    MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3793 		    NULL, NULL, &name);
3794 	} else {
3795 		kr = KERN_SUCCESS;
3796 	}
3797 	if (kr != KERN_SUCCESS) {
3798 		printf("warning: %s(%d) failed to copyout suspension "
3799 		    "token for pid %d with error: %d\n",
3800 		    proc_name_address(get_bsdtask_info(current_task())),
3801 		    proc_pid(get_bsdtask_info(current_task())),
3802 		    task_pid(task), kr);
3803 	}
3804 
3805 	return kr;
3806 }
3807 
3808 /*
3809  *	task_resume:
3810  *		Release a user hold on a task.
3811  *
3812  * Conditions:
3813  *		The caller holds a reference to the task
3814  */
3815 kern_return_t
task_resume(task_t task)3816 task_resume(
3817 	task_t  task)
3818 {
3819 	kern_return_t    kr;
3820 	mach_port_name_t resume_port_name;
3821 	ipc_entry_t              resume_port_entry;
3822 	ipc_space_t              space = current_task()->itk_space;
3823 
3824 	if (task == TASK_NULL || task == kernel_task) {
3825 		return KERN_INVALID_ARGUMENT;
3826 	}
3827 
3828 	/* release a legacy task hold */
3829 	task_lock(task);
3830 	kr = release_task_hold(task, TASK_HOLD_LEGACY);
3831 	task_unlock(task);
3832 
3833 	itk_lock(task); /* for itk_resume */
3834 	is_write_lock(space); /* spin lock */
3835 	if (is_active(space) && IP_VALID(task->itk_resume) &&
3836 	    ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3837 		/*
3838 		 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3839 		 * we are holding one less legacy hold on the task from this caller.  If the release failed,
3840 		 * go ahead and drop all the rights, as someone either already released our holds or the task
3841 		 * is gone.
3842 		 */
3843 		itk_unlock(task);
3844 		if (kr == KERN_SUCCESS) {
3845 			ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3846 		} else {
3847 			ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3848 		}
3849 		/* space unlocked */
3850 	} else {
3851 		itk_unlock(task);
3852 		is_write_unlock(space);
3853 		if (kr == KERN_SUCCESS) {
3854 			printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3855 			    proc_name_address(get_bsdtask_info(current_task())), proc_pid(get_bsdtask_info(current_task())),
3856 			    task_pid(task));
3857 		}
3858 	}
3859 
3860 	return kr;
3861 }
3862 
3863 /*
3864  * Suspend the target task.
3865  * Making/holding a token/reference/port is the callers responsibility.
3866  */
3867 kern_return_t
task_suspend_internal(task_t task)3868 task_suspend_internal(task_t task)
3869 {
3870 	kern_return_t    kr;
3871 
3872 	if (task == TASK_NULL || task == kernel_task) {
3873 		return KERN_INVALID_ARGUMENT;
3874 	}
3875 
3876 	task_lock(task);
3877 	kr = place_task_hold(task, TASK_HOLD_NORMAL);
3878 	task_unlock(task);
3879 	return kr;
3880 }
3881 
3882 /*
3883  * Suspend the target task, and return a suspension token. The token
3884  * represents a reference on the suspended task.
3885  */
3886 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)3887 task_suspend2_grp(
3888 	task_t                  task,
3889 	task_suspension_token_t *suspend_token,
3890 	task_grp_t              grp)
3891 {
3892 	kern_return_t    kr;
3893 
3894 	kr = task_suspend_internal(task);
3895 	if (kr != KERN_SUCCESS) {
3896 		*suspend_token = TASK_NULL;
3897 		return kr;
3898 	}
3899 
3900 	/*
3901 	 * Take a reference on the target task and return that to the caller
3902 	 * as a "suspension token," which can be converted into an SO right to
3903 	 * the now-suspended task's resume port.
3904 	 */
3905 	task_reference_grp(task, grp);
3906 	*suspend_token = task;
3907 
3908 	return KERN_SUCCESS;
3909 }
3910 
3911 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)3912 task_suspend2_mig(
3913 	task_t                  task,
3914 	task_suspension_token_t *suspend_token)
3915 {
3916 	return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
3917 }
3918 
3919 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)3920 task_suspend2_external(
3921 	task_t                  task,
3922 	task_suspension_token_t *suspend_token)
3923 {
3924 	return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
3925 }
3926 
3927 /*
3928  * Resume the task
3929  * (reference/token/port management is caller's responsibility).
3930  */
3931 kern_return_t
task_resume_internal(task_suspension_token_t task)3932 task_resume_internal(
3933 	task_suspension_token_t         task)
3934 {
3935 	kern_return_t kr;
3936 
3937 	if (task == TASK_NULL || task == kernel_task) {
3938 		return KERN_INVALID_ARGUMENT;
3939 	}
3940 
3941 	task_lock(task);
3942 	kr = release_task_hold(task, TASK_HOLD_NORMAL);
3943 	task_unlock(task);
3944 	return kr;
3945 }
3946 
3947 /*
3948  * Resume the task using a suspension token. Consumes the token's ref.
3949  */
3950 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)3951 task_resume2_grp(
3952 	task_suspension_token_t         task,
3953 	task_grp_t                      grp)
3954 {
3955 	kern_return_t kr;
3956 
3957 	kr = task_resume_internal(task);
3958 	task_suspension_token_deallocate_grp(task, grp);
3959 
3960 	return kr;
3961 }
3962 
3963 kern_return_t
task_resume2_mig(task_suspension_token_t task)3964 task_resume2_mig(
3965 	task_suspension_token_t         task)
3966 {
3967 	return task_resume2_grp(task, TASK_GRP_MIG);
3968 }
3969 
3970 kern_return_t
task_resume2_external(task_suspension_token_t task)3971 task_resume2_external(
3972 	task_suspension_token_t         task)
3973 {
3974 	return task_resume2_grp(task, TASK_GRP_EXTERNAL);
3975 }
3976 
3977 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)3978 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
3979 {
3980 	task_t task = convert_port_to_task_suspension_token(port);
3981 	kern_return_t kr;
3982 
3983 	if (task == TASK_NULL) {
3984 		return;
3985 	}
3986 
3987 	if (task == kernel_task) {
3988 		task_suspension_token_deallocate(task);
3989 		return;
3990 	}
3991 
3992 	task_lock(task);
3993 
3994 	kr = ipc_kobject_nsrequest(port, mscount, NULL);
3995 	if (kr == KERN_FAILURE) {
3996 		/* release all the [remaining] outstanding legacy holds */
3997 		release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3998 	}
3999 
4000 	task_unlock(task);
4001 
4002 	task_suspension_token_deallocate(task);         /* drop token reference */
4003 }
4004 
4005 /*
4006  * Fires when a send once made
4007  * by convert_task_suspension_token_to_port() dies.
4008  */
4009 void
task_suspension_send_once(ipc_port_t port)4010 task_suspension_send_once(ipc_port_t port)
4011 {
4012 	task_t task = convert_port_to_task_suspension_token(port);
4013 
4014 	if (task == TASK_NULL || task == kernel_task) {
4015 		return;         /* nothing to do */
4016 	}
4017 
4018 	/* release the hold held by this specific send-once right */
4019 	task_lock(task);
4020 	release_task_hold(task, TASK_HOLD_NORMAL);
4021 	task_unlock(task);
4022 
4023 	task_suspension_token_deallocate(task);         /* drop token reference */
4024 }
4025 
4026 static kern_return_t
task_pidsuspend_locked(task_t task)4027 task_pidsuspend_locked(task_t task)
4028 {
4029 	kern_return_t kr;
4030 
4031 	if (task->pidsuspended) {
4032 		kr = KERN_FAILURE;
4033 		goto out;
4034 	}
4035 
4036 	task->pidsuspended = TRUE;
4037 
4038 	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4039 	if (kr != KERN_SUCCESS) {
4040 		task->pidsuspended = FALSE;
4041 	}
4042 out:
4043 	return kr;
4044 }
4045 
4046 
4047 /*
4048  *	task_pidsuspend:
4049  *
4050  *	Suspends a task by placing a hold on its threads.
4051  *
4052  * Conditions:
4053  *      The caller holds a reference to the task
4054  */
4055 kern_return_t
task_pidsuspend(task_t task)4056 task_pidsuspend(
4057 	task_t          task)
4058 {
4059 	kern_return_t    kr;
4060 
4061 	if (task == TASK_NULL || task == kernel_task) {
4062 		return KERN_INVALID_ARGUMENT;
4063 	}
4064 
4065 	task_lock(task);
4066 
4067 	kr = task_pidsuspend_locked(task);
4068 
4069 	task_unlock(task);
4070 
4071 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4072 		iokit_task_app_suspended_changed(task);
4073 	}
4074 
4075 	return kr;
4076 }
4077 
4078 /*
4079  *	task_pidresume:
4080  *		Resumes a previously suspended task.
4081  *
4082  * Conditions:
4083  *		The caller holds a reference to the task
4084  */
4085 kern_return_t
task_pidresume(task_t task)4086 task_pidresume(
4087 	task_t  task)
4088 {
4089 	kern_return_t    kr;
4090 
4091 	if (task == TASK_NULL || task == kernel_task) {
4092 		return KERN_INVALID_ARGUMENT;
4093 	}
4094 
4095 	task_lock(task);
4096 
4097 #if CONFIG_FREEZE
4098 
4099 	while (task->changing_freeze_state) {
4100 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4101 		task_unlock(task);
4102 		thread_block(THREAD_CONTINUE_NULL);
4103 
4104 		task_lock(task);
4105 	}
4106 	task->changing_freeze_state = TRUE;
4107 #endif
4108 
4109 	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4110 
4111 	task_unlock(task);
4112 
4113 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4114 		iokit_task_app_suspended_changed(task);
4115 	}
4116 
4117 #if CONFIG_FREEZE
4118 
4119 	task_lock(task);
4120 
4121 	if (kr == KERN_SUCCESS) {
4122 		task->frozen = FALSE;
4123 	}
4124 	task->changing_freeze_state = FALSE;
4125 	thread_wakeup(&task->changing_freeze_state);
4126 
4127 	task_unlock(task);
4128 #endif
4129 
4130 	return kr;
4131 }
4132 
4133 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4134 
4135 /*
4136  *	task_add_turnstile_watchports:
4137  *		Setup watchports to boost the main thread of the task.
4138  *
4139  *	Arguments:
4140  *		task: task being spawned
4141  *		thread: main thread of task
4142  *		portwatch_ports: array of watchports
4143  *		portwatch_count: number of watchports
4144  *
4145  *	Conditions:
4146  *		Nothing locked.
4147  */
4148 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4149 task_add_turnstile_watchports(
4150 	task_t          task,
4151 	thread_t        thread,
4152 	ipc_port_t      *portwatch_ports,
4153 	uint32_t        portwatch_count)
4154 {
4155 	struct task_watchports *watchports = NULL;
4156 	struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4157 	os_ref_count_t refs;
4158 
4159 	/* Check if the task has terminated */
4160 	if (!task->active) {
4161 		return;
4162 	}
4163 
4164 	assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4165 
4166 	watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4167 
4168 	/* Lock the ipc space */
4169 	is_write_lock(task->itk_space);
4170 
4171 	/* Setup watchports to boost the main thread */
4172 	refs = task_add_turnstile_watchports_locked(task,
4173 	    watchports, previous_elem_array, portwatch_ports,
4174 	    portwatch_count);
4175 
4176 	/* Drop the space lock */
4177 	is_write_unlock(task->itk_space);
4178 
4179 	if (refs == 0) {
4180 		task_watchports_deallocate(watchports);
4181 	}
4182 
4183 	/* Drop the ref on previous_elem_array */
4184 	for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4185 		task_watchport_elem_deallocate(previous_elem_array[i]);
4186 	}
4187 }
4188 
4189 /*
4190  *	task_remove_turnstile_watchports:
4191  *		Clear all turnstile boost on the task from watchports.
4192  *
4193  *	Arguments:
4194  *		task: task being terminated
4195  *
4196  *	Conditions:
4197  *		Nothing locked.
4198  */
4199 void
task_remove_turnstile_watchports(task_t task)4200 task_remove_turnstile_watchports(
4201 	task_t          task)
4202 {
4203 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4204 	struct task_watchports *watchports = NULL;
4205 	ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4206 	uint32_t portwatch_count;
4207 
4208 	/* Lock the ipc space */
4209 	is_write_lock(task->itk_space);
4210 
4211 	/* Check if watchport boost exist */
4212 	if (task->watchports == NULL) {
4213 		is_write_unlock(task->itk_space);
4214 		return;
4215 	}
4216 	watchports = task->watchports;
4217 	portwatch_count = watchports->tw_elem_array_count;
4218 
4219 	refs = task_remove_turnstile_watchports_locked(task, watchports,
4220 	    port_freelist);
4221 
4222 	is_write_unlock(task->itk_space);
4223 
4224 	/* Drop all the port references */
4225 	for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4226 		ip_release(port_freelist[i]);
4227 	}
4228 
4229 	/* Clear the task and thread references for task_watchport */
4230 	if (refs == 0) {
4231 		task_watchports_deallocate(watchports);
4232 	}
4233 }
4234 
4235 /*
4236  *	task_transfer_turnstile_watchports:
4237  *		Transfer all watchport turnstile boost from old task to new task.
4238  *
4239  *	Arguments:
4240  *		old_task: task calling exec
4241  *		new_task: new exec'ed task
4242  *		thread: main thread of new task
4243  *
4244  *	Conditions:
4245  *		Nothing locked.
4246  */
4247 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4248 task_transfer_turnstile_watchports(
4249 	task_t   old_task,
4250 	task_t   new_task,
4251 	thread_t new_thread)
4252 {
4253 	struct task_watchports *old_watchports = NULL;
4254 	struct task_watchports *new_watchports = NULL;
4255 	os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4256 	os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4257 	uint32_t portwatch_count;
4258 
4259 	if (old_task->watchports == NULL || !new_task->active) {
4260 		return;
4261 	}
4262 
4263 	/* Get the watch port count from the old task */
4264 	is_write_lock(old_task->itk_space);
4265 	if (old_task->watchports == NULL) {
4266 		is_write_unlock(old_task->itk_space);
4267 		return;
4268 	}
4269 
4270 	portwatch_count = old_task->watchports->tw_elem_array_count;
4271 	is_write_unlock(old_task->itk_space);
4272 
4273 	new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4274 
4275 	/* Lock the ipc space for old task */
4276 	is_write_lock(old_task->itk_space);
4277 
4278 	/* Lock the ipc space for new task */
4279 	is_write_lock(new_task->itk_space);
4280 
4281 	/* Check if watchport boost exist */
4282 	if (old_task->watchports == NULL || !new_task->active) {
4283 		is_write_unlock(new_task->itk_space);
4284 		is_write_unlock(old_task->itk_space);
4285 		(void)task_watchports_release(new_watchports);
4286 		task_watchports_deallocate(new_watchports);
4287 		return;
4288 	}
4289 
4290 	old_watchports = old_task->watchports;
4291 	assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4292 
4293 	/* Setup new task watchports */
4294 	new_task->watchports = new_watchports;
4295 
4296 	for (uint32_t i = 0; i < portwatch_count; i++) {
4297 		ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4298 
4299 		if (port == NULL) {
4300 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4301 			continue;
4302 		}
4303 
4304 		/* Lock the port and check if it has the entry */
4305 		ip_mq_lock(port);
4306 
4307 		task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4308 
4309 		if (ipc_port_replace_watchport_elem_conditional_locked(port,
4310 		    &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4311 			task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4312 
4313 			task_watchports_retain(new_watchports);
4314 			old_refs = task_watchports_release(old_watchports);
4315 
4316 			/* Check if all ports are cleaned */
4317 			if (old_refs == 0) {
4318 				old_task->watchports = NULL;
4319 			}
4320 		} else {
4321 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4322 		}
4323 		/* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4324 	}
4325 
4326 	/* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4327 	new_refs = task_watchports_release(new_watchports);
4328 	if (new_refs == 0) {
4329 		new_task->watchports = NULL;
4330 	}
4331 
4332 	is_write_unlock(new_task->itk_space);
4333 	is_write_unlock(old_task->itk_space);
4334 
4335 	/* Clear the task and thread references for old_watchport */
4336 	if (old_refs == 0) {
4337 		task_watchports_deallocate(old_watchports);
4338 	}
4339 
4340 	/* Clear the task and thread references for new_watchport */
4341 	if (new_refs == 0) {
4342 		task_watchports_deallocate(new_watchports);
4343 	}
4344 }
4345 
4346 /*
4347  *	task_add_turnstile_watchports_locked:
4348  *		Setup watchports to boost the main thread of the task.
4349  *
4350  *	Arguments:
4351  *		task: task to boost
4352  *		watchports: watchport structure to be attached to the task
4353  *		previous_elem_array: an array of old watchport_elem to be returned to caller
4354  *		portwatch_ports: array of watchports
4355  *		portwatch_count: number of watchports
4356  *
4357  *	Conditions:
4358  *		ipc space of the task locked.
4359  *		returns array of old watchport_elem in previous_elem_array
4360  */
4361 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4362 task_add_turnstile_watchports_locked(
4363 	task_t                      task,
4364 	struct task_watchports      *watchports,
4365 	struct task_watchport_elem  **previous_elem_array,
4366 	ipc_port_t                  *portwatch_ports,
4367 	uint32_t                    portwatch_count)
4368 {
4369 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4370 
4371 	/* Check if the task is still active */
4372 	if (!task->active) {
4373 		refs = task_watchports_release(watchports);
4374 		return refs;
4375 	}
4376 
4377 	assert(task->watchports == NULL);
4378 	task->watchports = watchports;
4379 
4380 	for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4381 		ipc_port_t port = portwatch_ports[i];
4382 
4383 		task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4384 		if (port == NULL) {
4385 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4386 			continue;
4387 		}
4388 
4389 		ip_mq_lock(port);
4390 
4391 		/* Check if port is in valid state to be setup as watchport */
4392 		if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4393 		    &previous_elem_array[j]) != KERN_SUCCESS) {
4394 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4395 			continue;
4396 		}
4397 		/* port unlocked on return */
4398 
4399 		ip_reference(port);
4400 		task_watchports_retain(watchports);
4401 		if (previous_elem_array[j] != NULL) {
4402 			j++;
4403 		}
4404 	}
4405 
4406 	/* Drop the reference on task_watchport struct returned by os_ref_init */
4407 	refs = task_watchports_release(watchports);
4408 	if (refs == 0) {
4409 		task->watchports = NULL;
4410 	}
4411 
4412 	return refs;
4413 }
4414 
4415 /*
4416  *	task_remove_turnstile_watchports_locked:
4417  *		Clear all turnstile boost on the task from watchports.
4418  *
4419  *	Arguments:
4420  *		task: task to remove watchports from
4421  *		watchports: watchports structure for the task
4422  *		port_freelist: array of ports returned with ref to caller
4423  *
4424  *
4425  *	Conditions:
4426  *		ipc space of the task locked.
4427  *		array of ports with refs are returned in port_freelist
4428  */
4429 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4430 task_remove_turnstile_watchports_locked(
4431 	task_t                 task,
4432 	struct task_watchports *watchports,
4433 	ipc_port_t             *port_freelist)
4434 {
4435 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4436 
4437 	for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4438 		ipc_port_t port = watchports->tw_elem[i].twe_port;
4439 		if (port == NULL) {
4440 			continue;
4441 		}
4442 
4443 		/* Lock the port and check if it has the entry */
4444 		ip_mq_lock(port);
4445 		if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4446 		    &watchports->tw_elem[i]) == KERN_SUCCESS) {
4447 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4448 			port_freelist[j++] = port;
4449 			refs = task_watchports_release(watchports);
4450 
4451 			/* Check if all ports are cleaned */
4452 			if (refs == 0) {
4453 				task->watchports = NULL;
4454 				break;
4455 			}
4456 		}
4457 		/* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4458 	}
4459 	return refs;
4460 }
4461 
4462 /*
4463  *	task_watchports_alloc_init:
4464  *		Allocate and initialize task watchport struct.
4465  *
4466  *	Conditions:
4467  *		Nothing locked.
4468  */
4469 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4470 task_watchports_alloc_init(
4471 	task_t        task,
4472 	thread_t      thread,
4473 	uint32_t      count)
4474 {
4475 	struct task_watchports *watchports = kalloc_type(struct task_watchports,
4476 	    struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4477 
4478 	task_reference(task);
4479 	thread_reference(thread);
4480 	watchports->tw_task = task;
4481 	watchports->tw_thread = thread;
4482 	watchports->tw_elem_array_count = count;
4483 	os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4484 
4485 	return watchports;
4486 }
4487 
4488 /*
4489  *	task_watchports_deallocate:
4490  *		Deallocate task watchport struct.
4491  *
4492  *	Conditions:
4493  *		Nothing locked.
4494  */
4495 static void
task_watchports_deallocate(struct task_watchports * watchports)4496 task_watchports_deallocate(
4497 	struct task_watchports *watchports)
4498 {
4499 	uint32_t portwatch_count = watchports->tw_elem_array_count;
4500 
4501 	task_deallocate(watchports->tw_task);
4502 	thread_deallocate(watchports->tw_thread);
4503 	kfree_type(struct task_watchports, struct task_watchport_elem,
4504 	    portwatch_count, watchports);
4505 }
4506 
4507 /*
4508  *	task_watchport_elem_deallocate:
4509  *		Deallocate task watchport element and release its ref on task_watchport.
4510  *
4511  *	Conditions:
4512  *		Nothing locked.
4513  */
4514 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4515 task_watchport_elem_deallocate(
4516 	struct task_watchport_elem *watchport_elem)
4517 {
4518 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4519 	task_t task = watchport_elem->twe_task;
4520 	struct task_watchports *watchports = NULL;
4521 	ipc_port_t port = NULL;
4522 
4523 	assert(task != NULL);
4524 
4525 	/* Take the space lock to modify the elememt */
4526 	is_write_lock(task->itk_space);
4527 
4528 	watchports = task->watchports;
4529 	assert(watchports != NULL);
4530 
4531 	port = watchport_elem->twe_port;
4532 	assert(port != NULL);
4533 
4534 	task_watchport_elem_clear(watchport_elem);
4535 	refs = task_watchports_release(watchports);
4536 
4537 	if (refs == 0) {
4538 		task->watchports = NULL;
4539 	}
4540 
4541 	is_write_unlock(task->itk_space);
4542 
4543 	ip_release(port);
4544 	if (refs == 0) {
4545 		task_watchports_deallocate(watchports);
4546 	}
4547 }
4548 
4549 /*
4550  *	task_has_watchports:
4551  *		Return TRUE if task has watchport boosts.
4552  *
4553  *	Conditions:
4554  *		Nothing locked.
4555  */
4556 boolean_t
task_has_watchports(task_t task)4557 task_has_watchports(task_t task)
4558 {
4559 	return task->watchports != NULL;
4560 }
4561 
4562 #if DEVELOPMENT || DEBUG
4563 
4564 extern void IOSleep(int);
4565 
4566 kern_return_t
task_disconnect_page_mappings(task_t task)4567 task_disconnect_page_mappings(task_t task)
4568 {
4569 	int     n;
4570 
4571 	if (task == TASK_NULL || task == kernel_task) {
4572 		return KERN_INVALID_ARGUMENT;
4573 	}
4574 
4575 	/*
4576 	 * this function is used to strip all of the mappings from
4577 	 * the pmap for the specified task to force the task to
4578 	 * re-fault all of the pages it is actively using... this
4579 	 * allows us to approximate the true working set of the
4580 	 * specified task.  We only engage if at least 1 of the
4581 	 * threads in the task is runnable, but we want to continuously
4582 	 * sweep (at least for a while - I've arbitrarily set the limit at
4583 	 * 100 sweeps to be re-looked at as we gain experience) to get a better
4584 	 * view into what areas within a page are being visited (as opposed to only
4585 	 * seeing the first fault of a page after the task becomes
4586 	 * runnable)...  in the future I may
4587 	 * try to block until awakened by a thread in this task
4588 	 * being made runnable, but for now we'll periodically poll from the
4589 	 * user level debug tool driving the sysctl
4590 	 */
4591 	for (n = 0; n < 100; n++) {
4592 		thread_t        thread;
4593 		boolean_t       runnable;
4594 		boolean_t       do_unnest;
4595 		int             page_count;
4596 
4597 		runnable = FALSE;
4598 		do_unnest = FALSE;
4599 
4600 		task_lock(task);
4601 
4602 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
4603 			if (thread->state & TH_RUN) {
4604 				runnable = TRUE;
4605 				break;
4606 			}
4607 		}
4608 		if (n == 0) {
4609 			task->task_disconnected_count++;
4610 		}
4611 
4612 		if (task->task_unnested == FALSE) {
4613 			if (runnable == TRUE) {
4614 				task->task_unnested = TRUE;
4615 				do_unnest = TRUE;
4616 			}
4617 		}
4618 		task_unlock(task);
4619 
4620 		if (runnable == FALSE) {
4621 			break;
4622 		}
4623 
4624 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4625 		    task, do_unnest, task->task_disconnected_count, 0, 0);
4626 
4627 		page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4628 
4629 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4630 		    task, page_count, 0, 0, 0);
4631 
4632 		if ((n % 5) == 4) {
4633 			IOSleep(1);
4634 		}
4635 	}
4636 	return KERN_SUCCESS;
4637 }
4638 
4639 #endif
4640 
4641 
4642 #if CONFIG_FREEZE
4643 
4644 /*
4645  *	task_freeze:
4646  *
4647  *	Freeze a task.
4648  *
4649  * Conditions:
4650  *      The caller holds a reference to the task
4651  */
4652 extern void     vm_wake_compactor_swapper(void);
4653 extern struct freezer_context freezer_context_global;
4654 
4655 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4656 task_freeze(
4657 	task_t    task,
4658 	uint32_t           *purgeable_count,
4659 	uint32_t           *wired_count,
4660 	uint32_t           *clean_count,
4661 	uint32_t           *dirty_count,
4662 	uint32_t           dirty_budget,
4663 	uint32_t           *shared_count,
4664 	int                *freezer_error_code,
4665 	boolean_t          eval_only)
4666 {
4667 	kern_return_t kr = KERN_SUCCESS;
4668 
4669 	if (task == TASK_NULL || task == kernel_task) {
4670 		return KERN_INVALID_ARGUMENT;
4671 	}
4672 
4673 	task_lock(task);
4674 
4675 	while (task->changing_freeze_state) {
4676 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4677 		task_unlock(task);
4678 		thread_block(THREAD_CONTINUE_NULL);
4679 
4680 		task_lock(task);
4681 	}
4682 	if (task->frozen) {
4683 		task_unlock(task);
4684 		return KERN_FAILURE;
4685 	}
4686 	task->changing_freeze_state = TRUE;
4687 
4688 	freezer_context_global.freezer_ctx_task = task;
4689 
4690 	task_unlock(task);
4691 
4692 	kr = vm_map_freeze(task,
4693 	    purgeable_count,
4694 	    wired_count,
4695 	    clean_count,
4696 	    dirty_count,
4697 	    dirty_budget,
4698 	    shared_count,
4699 	    freezer_error_code,
4700 	    eval_only);
4701 
4702 	task_lock(task);
4703 
4704 	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4705 		task->frozen = TRUE;
4706 
4707 		freezer_context_global.freezer_ctx_task = NULL;
4708 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4709 
4710 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4711 			/*
4712 			 * reset the counter tracking the # of swapped compressed pages
4713 			 * because we are now done with this freeze session and task.
4714 			 */
4715 
4716 			*dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64);         /*used to track pageouts*/
4717 		}
4718 
4719 		freezer_context_global.freezer_ctx_swapped_bytes = 0;
4720 	}
4721 
4722 	task->changing_freeze_state = FALSE;
4723 	thread_wakeup(&task->changing_freeze_state);
4724 
4725 	task_unlock(task);
4726 
4727 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4728 	    (kr == KERN_SUCCESS) &&
4729 	    (eval_only == FALSE)) {
4730 		vm_wake_compactor_swapper();
4731 		/*
4732 		 * We do an explicit wakeup of the swapout thread here
4733 		 * because the compact_and_swap routines don't have
4734 		 * knowledge about these kind of "per-task packed c_segs"
4735 		 * and so will not be evaluating whether we need to do
4736 		 * a wakeup there.
4737 		 */
4738 		thread_wakeup((event_t)&vm_swapout_thread);
4739 	}
4740 
4741 	return kr;
4742 }
4743 
4744 /*
4745  *	task_thaw:
4746  *
4747  *	Thaw a currently frozen task.
4748  *
4749  * Conditions:
4750  *      The caller holds a reference to the task
4751  */
4752 kern_return_t
task_thaw(task_t task)4753 task_thaw(
4754 	task_t          task)
4755 {
4756 	if (task == TASK_NULL || task == kernel_task) {
4757 		return KERN_INVALID_ARGUMENT;
4758 	}
4759 
4760 	task_lock(task);
4761 
4762 	while (task->changing_freeze_state) {
4763 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4764 		task_unlock(task);
4765 		thread_block(THREAD_CONTINUE_NULL);
4766 
4767 		task_lock(task);
4768 	}
4769 	if (!task->frozen) {
4770 		task_unlock(task);
4771 		return KERN_FAILURE;
4772 	}
4773 	task->frozen = FALSE;
4774 
4775 	task_unlock(task);
4776 
4777 	return KERN_SUCCESS;
4778 }
4779 
4780 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)4781 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
4782 {
4783 	/*
4784 	 * We don't assert that the task lock is held because we call this
4785 	 * routine from the decompression path and we won't be holding the
4786 	 * task lock. However, since we are in the context of the task we are
4787 	 * safe.
4788 	 * In the case of the task_freeze path, we call it from behind the task
4789 	 * lock but we don't need to because we have a reference on the proc
4790 	 * being frozen.
4791 	 */
4792 
4793 	assert(task);
4794 	if (amount == 0) {
4795 		return;
4796 	}
4797 
4798 	if (op == CREDIT_TO_SWAP) {
4799 		ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4800 	} else if (op == DEBIT_FROM_SWAP) {
4801 		ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4802 	} else {
4803 		panic("task_update_frozen_to_swap_acct: Invalid ledger op");
4804 	}
4805 }
4806 #endif /* CONFIG_FREEZE */
4807 
4808 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)4809 task_set_security_tokens(
4810 	task_t           task,
4811 	security_token_t sec_token,
4812 	audit_token_t    audit_token,
4813 	host_priv_t      host_priv)
4814 {
4815 	ipc_port_t       host_port;
4816 	kern_return_t    kr;
4817 
4818 	if (task == TASK_NULL) {
4819 		return KERN_INVALID_ARGUMENT;
4820 	}
4821 
4822 	task_lock(task);
4823 	task_set_tokens(task, &sec_token, &audit_token);
4824 	task_unlock(task);
4825 
4826 	if (host_priv != HOST_PRIV_NULL) {
4827 		kr = host_get_host_priv_port(host_priv, &host_port);
4828 	} else {
4829 		kr = host_get_host_port(host_priv_self(), &host_port);
4830 	}
4831 	assert(kr == KERN_SUCCESS);
4832 
4833 	kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4834 	return kr;
4835 }
4836 
4837 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)4838 task_send_trace_memory(
4839 	__unused task_t   target_task,
4840 	__unused uint32_t pid,
4841 	__unused uint64_t uniqueid)
4842 {
4843 	return KERN_INVALID_ARGUMENT;
4844 }
4845 
4846 /*
4847  * This routine was added, pretty much exclusively, for registering the
4848  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4849  * removing it completely, I have only disabled that feature (which was
4850  * the only feature at the time).  It just appears that we are going to
4851  * want to add some user data to tasks in the future (i.e. bsd info,
4852  * task names, etc...), so I left it in the formal task interface.
4853  */
4854 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)4855 task_set_info(
4856 	task_t          task,
4857 	task_flavor_t   flavor,
4858 	__unused task_info_t    task_info_in,           /* pointer to IN array */
4859 	__unused mach_msg_type_number_t task_info_count)
4860 {
4861 	if (task == TASK_NULL) {
4862 		return KERN_INVALID_ARGUMENT;
4863 	}
4864 	switch (flavor) {
4865 #if CONFIG_ATM
4866 	case TASK_TRACE_MEMORY_INFO:
4867 		return KERN_NOT_SUPPORTED;
4868 #endif // CONFIG_ATM
4869 	default:
4870 		return KERN_INVALID_ARGUMENT;
4871 	}
4872 }
4873 
4874 static void
_task_fill_times(task_t task,time_value_t * user_time,time_value_t * sys_time)4875 _task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
4876 {
4877 	clock_sec_t sec;
4878 	clock_usec_t usec;
4879 
4880 	struct recount_times_mach times = recount_task_terminated_times(task);
4881 	absolutetime_to_microtime(times.rtm_user, &sec, &usec);
4882 	user_time->seconds = (typeof(user_time->seconds))sec;
4883 	user_time->microseconds = usec;
4884 	absolutetime_to_microtime(times.rtm_system, &sec, &usec);
4885 	sys_time->seconds = (typeof(sys_time->seconds))sec;
4886 	sys_time->microseconds = usec;
4887 }
4888 
4889 int radar_20146450 = 1;
4890 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)4891 task_info(
4892 	task_t                  task,
4893 	task_flavor_t           flavor,
4894 	task_info_t             task_info_out,
4895 	mach_msg_type_number_t  *task_info_count)
4896 {
4897 	kern_return_t error = KERN_SUCCESS;
4898 	mach_msg_type_number_t  original_task_info_count;
4899 	bool is_kernel_task = (task == kernel_task);
4900 
4901 	if (task == TASK_NULL) {
4902 		return KERN_INVALID_ARGUMENT;
4903 	}
4904 
4905 	original_task_info_count = *task_info_count;
4906 	task_lock(task);
4907 
4908 	if (task != current_task() && !task->active) {
4909 		task_unlock(task);
4910 		return KERN_INVALID_ARGUMENT;
4911 	}
4912 
4913 
4914 	switch (flavor) {
4915 	case TASK_BASIC_INFO_32:
4916 	case TASK_BASIC2_INFO_32:
4917 #if defined(__arm64__)
4918 	case TASK_BASIC_INFO_64:
4919 #endif
4920 		{
4921 			task_basic_info_32_t basic_info;
4922 			ledger_amount_t      tmp;
4923 
4924 			if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4925 				error = KERN_INVALID_ARGUMENT;
4926 				break;
4927 			}
4928 
4929 			basic_info = (task_basic_info_32_t)task_info_out;
4930 
4931 			basic_info->virtual_size = (typeof(basic_info->virtual_size))
4932 			    vm_map_adjusted_size(is_kernel_task ? kernel_map : task->map);
4933 			if (flavor == TASK_BASIC2_INFO_32) {
4934 				/*
4935 				 * The "BASIC2" flavor gets the maximum resident
4936 				 * size instead of the current resident size...
4937 				 */
4938 				ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
4939 			} else {
4940 				ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
4941 			}
4942 			basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
4943 
4944 			_task_fill_times(task, &basic_info->user_time,
4945 			    &basic_info->system_time);
4946 
4947 			basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
4948 			basic_info->suspend_count = task->user_stop_count;
4949 
4950 			*task_info_count = TASK_BASIC_INFO_32_COUNT;
4951 			break;
4952 		}
4953 
4954 #if defined(__arm64__)
4955 	case TASK_BASIC_INFO_64_2:
4956 	{
4957 		task_basic_info_64_2_t  basic_info;
4958 
4959 		if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4960 			error = KERN_INVALID_ARGUMENT;
4961 			break;
4962 		}
4963 
4964 		basic_info = (task_basic_info_64_2_t)task_info_out;
4965 
4966 		basic_info->virtual_size  = vm_map_adjusted_size(is_kernel_task ?
4967 		    kernel_map : task->map);
4968 		ledger_get_balance(task->ledger, task_ledgers.phys_mem,
4969 		    (ledger_amount_t *)&basic_info->resident_size);
4970 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
4971 		basic_info->suspend_count = task->user_stop_count;
4972 		_task_fill_times(task, &basic_info->user_time,
4973 		    &basic_info->system_time);
4974 
4975 		*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4976 		break;
4977 	}
4978 
4979 #else /* defined(__arm64__) */
4980 	case TASK_BASIC_INFO_64:
4981 	{
4982 		task_basic_info_64_t basic_info;
4983 
4984 		if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4985 			error = KERN_INVALID_ARGUMENT;
4986 			break;
4987 		}
4988 
4989 		basic_info = (task_basic_info_64_t)task_info_out;
4990 
4991 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
4992 		    kernel_map : task->map);
4993 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
4994 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
4995 		basic_info->suspend_count = task->user_stop_count;
4996 		_task_fill_times(task, &basic_info->user_time,
4997 		    &basic_info->system_time);
4998 
4999 		*task_info_count = TASK_BASIC_INFO_64_COUNT;
5000 		break;
5001 	}
5002 #endif /* defined(__arm64__) */
5003 
5004 	case MACH_TASK_BASIC_INFO:
5005 	{
5006 		mach_task_basic_info_t  basic_info;
5007 
5008 		if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5009 			error = KERN_INVALID_ARGUMENT;
5010 			break;
5011 		}
5012 
5013 		basic_info = (mach_task_basic_info_t)task_info_out;
5014 
5015 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5016 		    kernel_map : task->map);
5017 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
5018 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
5019 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5020 		basic_info->suspend_count = task->user_stop_count;
5021 		_task_fill_times(task, &basic_info->user_time,
5022 		    &basic_info->system_time);
5023 
5024 		*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5025 		break;
5026 	}
5027 
5028 	case TASK_THREAD_TIMES_INFO:
5029 	{
5030 		task_thread_times_info_t times_info;
5031 		thread_t                 thread;
5032 
5033 		if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5034 			error = KERN_INVALID_ARGUMENT;
5035 			break;
5036 		}
5037 
5038 		times_info = (task_thread_times_info_t)task_info_out;
5039 		times_info->user_time = (time_value_t){ 0 };
5040 		times_info->system_time = (time_value_t){ 0 };
5041 
5042 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5043 			if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5044 				time_value_t user_time, system_time;
5045 
5046 				thread_read_times(thread, &user_time, &system_time, NULL);
5047 				time_value_add(&times_info->user_time, &user_time);
5048 				time_value_add(&times_info->system_time, &system_time);
5049 			}
5050 		}
5051 
5052 		*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5053 		break;
5054 	}
5055 
5056 	case TASK_ABSOLUTETIME_INFO:
5057 	{
5058 		task_absolutetime_info_t        info;
5059 
5060 		if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5061 			error = KERN_INVALID_ARGUMENT;
5062 			break;
5063 		}
5064 
5065 		info = (task_absolutetime_info_t)task_info_out;
5066 
5067 		struct recount_times_mach term_times =
5068 		    recount_task_terminated_times(task);
5069 		struct recount_times_mach total_times = recount_task_times(task);
5070 
5071 		info->total_user = total_times.rtm_user;
5072 		info->total_system = total_times.rtm_system;
5073 		info->threads_user = total_times.rtm_user - term_times.rtm_user;
5074 		info->threads_system += total_times.rtm_system - term_times.rtm_system;
5075 
5076 		*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5077 		break;
5078 	}
5079 
5080 	case TASK_DYLD_INFO:
5081 	{
5082 		task_dyld_info_t info;
5083 
5084 		/*
5085 		 * We added the format field to TASK_DYLD_INFO output.  For
5086 		 * temporary backward compatibility, accept the fact that
5087 		 * clients may ask for the old version - distinquished by the
5088 		 * size of the expected result structure.
5089 		 */
5090 #define TASK_LEGACY_DYLD_INFO_COUNT \
5091 	        offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5092 
5093 		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5094 			error = KERN_INVALID_ARGUMENT;
5095 			break;
5096 		}
5097 
5098 		info = (task_dyld_info_t)task_info_out;
5099 		info->all_image_info_addr = task->all_image_info_addr;
5100 		info->all_image_info_size = task->all_image_info_size;
5101 
5102 		/* only set format on output for those expecting it */
5103 		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5104 			info->all_image_info_format = task_has_64Bit_addr(task) ?
5105 			    TASK_DYLD_ALL_IMAGE_INFO_64 :
5106 			    TASK_DYLD_ALL_IMAGE_INFO_32;
5107 			*task_info_count = TASK_DYLD_INFO_COUNT;
5108 		} else {
5109 			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5110 		}
5111 		break;
5112 	}
5113 
5114 	case TASK_EXTMOD_INFO:
5115 	{
5116 		task_extmod_info_t info;
5117 		void *p;
5118 
5119 		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5120 			error = KERN_INVALID_ARGUMENT;
5121 			break;
5122 		}
5123 
5124 		info = (task_extmod_info_t)task_info_out;
5125 
5126 		p = get_bsdtask_info(task);
5127 		if (p) {
5128 			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5129 		} else {
5130 			bzero(info->task_uuid, sizeof(info->task_uuid));
5131 		}
5132 		info->extmod_statistics = task->extmod_statistics;
5133 		*task_info_count = TASK_EXTMOD_INFO_COUNT;
5134 
5135 		break;
5136 	}
5137 
5138 	case TASK_KERNELMEMORY_INFO:
5139 	{
5140 		task_kernelmemory_info_t        tkm_info;
5141 		ledger_amount_t                 credit, debit;
5142 
5143 		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5144 			error = KERN_INVALID_ARGUMENT;
5145 			break;
5146 		}
5147 
5148 		tkm_info = (task_kernelmemory_info_t) task_info_out;
5149 		tkm_info->total_palloc = 0;
5150 		tkm_info->total_pfree = 0;
5151 		tkm_info->total_salloc = 0;
5152 		tkm_info->total_sfree = 0;
5153 
5154 		if (task == kernel_task) {
5155 			/*
5156 			 * All shared allocs/frees from other tasks count against
5157 			 * the kernel private memory usage.  If we are looking up
5158 			 * info for the kernel task, gather from everywhere.
5159 			 */
5160 			task_unlock(task);
5161 
5162 			/* start by accounting for all the terminated tasks against the kernel */
5163 			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5164 			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5165 
5166 			/* count all other task/thread shared alloc/free against the kernel */
5167 			lck_mtx_lock(&tasks_threads_lock);
5168 
5169 			/* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5170 			queue_iterate(&tasks, task, task_t, tasks) {
5171 				if (task == kernel_task) {
5172 					if (ledger_get_entries(task->ledger,
5173 					    task_ledgers.tkm_private, &credit,
5174 					    &debit) == KERN_SUCCESS) {
5175 						tkm_info->total_palloc += credit;
5176 						tkm_info->total_pfree += debit;
5177 					}
5178 				}
5179 				if (!ledger_get_entries(task->ledger,
5180 				    task_ledgers.tkm_shared, &credit, &debit)) {
5181 					tkm_info->total_palloc += credit;
5182 					tkm_info->total_pfree += debit;
5183 				}
5184 			}
5185 			lck_mtx_unlock(&tasks_threads_lock);
5186 		} else {
5187 			if (!ledger_get_entries(task->ledger,
5188 			    task_ledgers.tkm_private, &credit, &debit)) {
5189 				tkm_info->total_palloc = credit;
5190 				tkm_info->total_pfree = debit;
5191 			}
5192 			if (!ledger_get_entries(task->ledger,
5193 			    task_ledgers.tkm_shared, &credit, &debit)) {
5194 				tkm_info->total_salloc = credit;
5195 				tkm_info->total_sfree = debit;
5196 			}
5197 			task_unlock(task);
5198 		}
5199 
5200 		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5201 		return KERN_SUCCESS;
5202 	}
5203 
5204 	/* OBSOLETE */
5205 	case TASK_SCHED_FIFO_INFO:
5206 	{
5207 		if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5208 			error = KERN_INVALID_ARGUMENT;
5209 			break;
5210 		}
5211 
5212 		error = KERN_INVALID_POLICY;
5213 		break;
5214 	}
5215 
5216 	/* OBSOLETE */
5217 	case TASK_SCHED_RR_INFO:
5218 	{
5219 		policy_rr_base_t        rr_base;
5220 		uint32_t quantum_time;
5221 		uint64_t quantum_ns;
5222 
5223 		if (*task_info_count < POLICY_RR_BASE_COUNT) {
5224 			error = KERN_INVALID_ARGUMENT;
5225 			break;
5226 		}
5227 
5228 		rr_base = (policy_rr_base_t) task_info_out;
5229 
5230 		if (task != kernel_task) {
5231 			error = KERN_INVALID_POLICY;
5232 			break;
5233 		}
5234 
5235 		rr_base->base_priority = task->priority;
5236 
5237 		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5238 		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5239 
5240 		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5241 
5242 		*task_info_count = POLICY_RR_BASE_COUNT;
5243 		break;
5244 	}
5245 
5246 	/* OBSOLETE */
5247 	case TASK_SCHED_TIMESHARE_INFO:
5248 	{
5249 		policy_timeshare_base_t ts_base;
5250 
5251 		if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5252 			error = KERN_INVALID_ARGUMENT;
5253 			break;
5254 		}
5255 
5256 		ts_base = (policy_timeshare_base_t) task_info_out;
5257 
5258 		if (task == kernel_task) {
5259 			error = KERN_INVALID_POLICY;
5260 			break;
5261 		}
5262 
5263 		ts_base->base_priority = task->priority;
5264 
5265 		*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5266 		break;
5267 	}
5268 
5269 	case TASK_SECURITY_TOKEN:
5270 	{
5271 		security_token_t        *sec_token_p;
5272 
5273 		if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5274 			error = KERN_INVALID_ARGUMENT;
5275 			break;
5276 		}
5277 
5278 		sec_token_p = (security_token_t *) task_info_out;
5279 
5280 		*sec_token_p = *task_get_sec_token(task);
5281 
5282 		*task_info_count = TASK_SECURITY_TOKEN_COUNT;
5283 		break;
5284 	}
5285 
5286 	case TASK_AUDIT_TOKEN:
5287 	{
5288 		audit_token_t   *audit_token_p;
5289 
5290 		if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5291 			error = KERN_INVALID_ARGUMENT;
5292 			break;
5293 		}
5294 
5295 		audit_token_p = (audit_token_t *) task_info_out;
5296 
5297 		*audit_token_p = *task_get_audit_token(task);
5298 
5299 		*task_info_count = TASK_AUDIT_TOKEN_COUNT;
5300 		break;
5301 	}
5302 
5303 	case TASK_SCHED_INFO:
5304 		error = KERN_INVALID_ARGUMENT;
5305 		break;
5306 
5307 	case TASK_EVENTS_INFO:
5308 	{
5309 		task_events_info_t      events_info;
5310 		thread_t                thread;
5311 		uint64_t                n_syscalls_mach, n_syscalls_unix, n_csw;
5312 
5313 		if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5314 			error = KERN_INVALID_ARGUMENT;
5315 			break;
5316 		}
5317 
5318 		events_info = (task_events_info_t) task_info_out;
5319 
5320 
5321 		events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5322 		events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5323 		events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5324 		events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5325 		events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5326 
5327 		n_syscalls_mach = task->syscalls_mach;
5328 		n_syscalls_unix = task->syscalls_unix;
5329 		n_csw = task->c_switch;
5330 
5331 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5332 			n_csw           += thread->c_switch;
5333 			n_syscalls_mach += thread->syscalls_mach;
5334 			n_syscalls_unix += thread->syscalls_unix;
5335 		}
5336 
5337 		events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5338 		events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5339 		events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5340 
5341 		*task_info_count = TASK_EVENTS_INFO_COUNT;
5342 		break;
5343 	}
5344 	case TASK_AFFINITY_TAG_INFO:
5345 	{
5346 		if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5347 			error = KERN_INVALID_ARGUMENT;
5348 			break;
5349 		}
5350 
5351 		error = task_affinity_info(task, task_info_out, task_info_count);
5352 		break;
5353 	}
5354 	case TASK_POWER_INFO:
5355 	{
5356 		if (*task_info_count < TASK_POWER_INFO_COUNT) {
5357 			error = KERN_INVALID_ARGUMENT;
5358 			break;
5359 		}
5360 
5361 		task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5362 		break;
5363 	}
5364 
5365 	case TASK_POWER_INFO_V2:
5366 	{
5367 		if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5368 			error = KERN_INVALID_ARGUMENT;
5369 			break;
5370 		}
5371 		task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5372 		task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5373 		break;
5374 	}
5375 
5376 	case TASK_VM_INFO:
5377 	case TASK_VM_INFO_PURGEABLE:
5378 	{
5379 		task_vm_info_t          vm_info;
5380 		vm_map_t                map;
5381 		ledger_amount_t         tmp_amount;
5382 
5383 		struct proc *p;
5384 		uint32_t platform, sdk;
5385 		p = current_proc();
5386 		platform = proc_platform(p);
5387 		sdk = proc_sdk(p);
5388 		if (original_task_info_count > TASK_VM_INFO_COUNT) {
5389 			/*
5390 			 * Some iOS apps pass an incorrect value for
5391 			 * task_info_count, expressed in number of bytes
5392 			 * instead of number of "natural_t" elements, which
5393 			 * can lead to binary compatibility issues (including
5394 			 * stack corruption) when the data structure is
5395 			 * expanded in the future.
5396 			 * Let's make this potential issue visible by
5397 			 * logging about it...
5398 			 */
5399 			printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5400 			    "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5401 			    "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5402 			    __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5403 			    flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5404 			    platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5405 			DTRACE_VM4(suspicious_task_vm_info_count,
5406 			    mach_msg_type_number_t, original_task_info_count,
5407 			    mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5408 			    uint32_t, platform,
5409 			    uint32_t, sdk);
5410 		}
5411 #if __arm64__
5412 		if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5413 		    platform == PLATFORM_IOS &&
5414 		    sdk != 0 &&
5415 		    (sdk >> 16) <= 12) {
5416 			/*
5417 			 * Some iOS apps pass an incorrect value for
5418 			 * task_info_count, expressed in number of bytes
5419 			 * instead of number of "natural_t" elements.
5420 			 * For the sake of backwards binary compatibility
5421 			 * for apps built with an iOS12 or older SDK and using
5422 			 * the "rev2" data structure, let's fix task_info_count
5423 			 * for them, to avoid stomping past the actual end
5424 			 * of their buffer.
5425 			 */
5426 #if DEVELOPMENT || DEBUG
5427 			printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5428 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5429 			    proc_name_address(p), original_task_info_count,
5430 			    TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5431 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5432 #endif /* DEVELOPMENT || DEBUG */
5433 			DTRACE_VM4(workaround_task_vm_info_count,
5434 			    mach_msg_type_number_t, original_task_info_count,
5435 			    mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5436 			    uint32_t, platform,
5437 			    uint32_t, sdk);
5438 			original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5439 			*task_info_count = original_task_info_count;
5440 		}
5441 		if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5442 		    platform == PLATFORM_IOS &&
5443 		    sdk != 0 &&
5444 		    (sdk >> 16) <= 15) {
5445 			/*
5446 			 * Some iOS apps pass an incorrect value for
5447 			 * task_info_count, expressed in number of bytes
5448 			 * instead of number of "natural_t" elements.
5449 			 */
5450 			printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5451 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5452 			    proc_name_address(p), original_task_info_count,
5453 			    TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5454 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5455 			DTRACE_VM4(workaround_task_vm_info_count,
5456 			    mach_msg_type_number_t, original_task_info_count,
5457 			    mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5458 			    uint32_t, platform,
5459 			    uint32_t, sdk);
5460 #if DEVELOPMENT || DEBUG
5461 			/*
5462 			 * For the sake of internal builds livability,
5463 			 * work around this user-space bug by capping the
5464 			 * buffer's size to what it was with the iOS15 SDK.
5465 			 */
5466 			original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5467 			*task_info_count = original_task_info_count;
5468 #endif /* DEVELOPMENT || DEBUG */
5469 		}
5470 #endif /* __arm64__ */
5471 
5472 		if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5473 			error = KERN_INVALID_ARGUMENT;
5474 			break;
5475 		}
5476 
5477 		vm_info = (task_vm_info_t)task_info_out;
5478 
5479 		/*
5480 		 * Do not hold both the task and map locks,
5481 		 * so convert the task lock into a map reference,
5482 		 * drop the task lock, then lock the map.
5483 		 */
5484 		if (is_kernel_task) {
5485 			map = kernel_map;
5486 			task_unlock(task);
5487 			/* no lock, no reference */
5488 		} else {
5489 			map = task->map;
5490 			vm_map_reference(map);
5491 			task_unlock(task);
5492 			vm_map_lock_read(map);
5493 		}
5494 
5495 		vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5496 		vm_info->region_count = map->hdr.nentries;
5497 		vm_info->page_size = vm_map_page_size(map);
5498 
5499 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5500 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5501 
5502 		vm_info->device = 0;
5503 		vm_info->device_peak = 0;
5504 		ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5505 		ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5506 		ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5507 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5508 		ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5509 		ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5510 		ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5511 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5512 		ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5513 
5514 		vm_info->purgeable_volatile_pmap = 0;
5515 		vm_info->purgeable_volatile_resident = 0;
5516 		vm_info->purgeable_volatile_virtual = 0;
5517 		if (is_kernel_task) {
5518 			/*
5519 			 * We do not maintain the detailed stats for the
5520 			 * kernel_pmap, so just count everything as
5521 			 * "internal"...
5522 			 */
5523 			vm_info->internal = vm_info->resident_size;
5524 			/*
5525 			 * ... but since the memory held by the VM compressor
5526 			 * in the kernel address space ought to be attributed
5527 			 * to user-space tasks, we subtract it from "internal"
5528 			 * to give memory reporting tools a more accurate idea
5529 			 * of what the kernel itself is actually using, instead
5530 			 * of making it look like the kernel is leaking memory
5531 			 * when the system is under memory pressure.
5532 			 */
5533 			vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5534 			    PAGE_SIZE);
5535 		} else {
5536 			mach_vm_size_t  volatile_virtual_size;
5537 			mach_vm_size_t  volatile_resident_size;
5538 			mach_vm_size_t  volatile_compressed_size;
5539 			mach_vm_size_t  volatile_pmap_size;
5540 			mach_vm_size_t  volatile_compressed_pmap_size;
5541 			kern_return_t   kr;
5542 
5543 			if (flavor == TASK_VM_INFO_PURGEABLE) {
5544 				kr = vm_map_query_volatile(
5545 					map,
5546 					&volatile_virtual_size,
5547 					&volatile_resident_size,
5548 					&volatile_compressed_size,
5549 					&volatile_pmap_size,
5550 					&volatile_compressed_pmap_size);
5551 				if (kr == KERN_SUCCESS) {
5552 					vm_info->purgeable_volatile_pmap =
5553 					    volatile_pmap_size;
5554 					if (radar_20146450) {
5555 						vm_info->compressed -=
5556 						    volatile_compressed_pmap_size;
5557 					}
5558 					vm_info->purgeable_volatile_resident =
5559 					    volatile_resident_size;
5560 					vm_info->purgeable_volatile_virtual =
5561 					    volatile_virtual_size;
5562 				}
5563 			}
5564 		}
5565 		*task_info_count = TASK_VM_INFO_REV0_COUNT;
5566 
5567 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5568 			/* must be captured while we still have the map lock */
5569 			vm_info->min_address = map->min_offset;
5570 			vm_info->max_address = map->max_offset;
5571 		}
5572 
5573 		/*
5574 		 * Done with vm map things, can drop the map lock and reference,
5575 		 * and take the task lock back.
5576 		 *
5577 		 * Re-validate that the task didn't die on us.
5578 		 */
5579 		if (!is_kernel_task) {
5580 			vm_map_unlock_read(map);
5581 			vm_map_deallocate(map);
5582 		}
5583 		map = VM_MAP_NULL;
5584 
5585 		task_lock(task);
5586 
5587 		if ((task != current_task()) && (!task->active)) {
5588 			error = KERN_INVALID_ARGUMENT;
5589 			break;
5590 		}
5591 
5592 		if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5593 			vm_info->phys_footprint =
5594 			    (mach_vm_size_t) get_task_phys_footprint(task);
5595 			*task_info_count = TASK_VM_INFO_REV1_COUNT;
5596 		}
5597 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5598 			/* data was captured above */
5599 			*task_info_count = TASK_VM_INFO_REV2_COUNT;
5600 		}
5601 
5602 		if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5603 			ledger_get_lifetime_max(task->ledger,
5604 			    task_ledgers.phys_footprint,
5605 			    &vm_info->ledger_phys_footprint_peak);
5606 			ledger_get_balance(task->ledger,
5607 			    task_ledgers.purgeable_nonvolatile,
5608 			    &vm_info->ledger_purgeable_nonvolatile);
5609 			ledger_get_balance(task->ledger,
5610 			    task_ledgers.purgeable_nonvolatile_compressed,
5611 			    &vm_info->ledger_purgeable_novolatile_compressed);
5612 			ledger_get_balance(task->ledger,
5613 			    task_ledgers.purgeable_volatile,
5614 			    &vm_info->ledger_purgeable_volatile);
5615 			ledger_get_balance(task->ledger,
5616 			    task_ledgers.purgeable_volatile_compressed,
5617 			    &vm_info->ledger_purgeable_volatile_compressed);
5618 			ledger_get_balance(task->ledger,
5619 			    task_ledgers.network_nonvolatile,
5620 			    &vm_info->ledger_tag_network_nonvolatile);
5621 			ledger_get_balance(task->ledger,
5622 			    task_ledgers.network_nonvolatile_compressed,
5623 			    &vm_info->ledger_tag_network_nonvolatile_compressed);
5624 			ledger_get_balance(task->ledger,
5625 			    task_ledgers.network_volatile,
5626 			    &vm_info->ledger_tag_network_volatile);
5627 			ledger_get_balance(task->ledger,
5628 			    task_ledgers.network_volatile_compressed,
5629 			    &vm_info->ledger_tag_network_volatile_compressed);
5630 			ledger_get_balance(task->ledger,
5631 			    task_ledgers.media_footprint,
5632 			    &vm_info->ledger_tag_media_footprint);
5633 			ledger_get_balance(task->ledger,
5634 			    task_ledgers.media_footprint_compressed,
5635 			    &vm_info->ledger_tag_media_footprint_compressed);
5636 			ledger_get_balance(task->ledger,
5637 			    task_ledgers.media_nofootprint,
5638 			    &vm_info->ledger_tag_media_nofootprint);
5639 			ledger_get_balance(task->ledger,
5640 			    task_ledgers.media_nofootprint_compressed,
5641 			    &vm_info->ledger_tag_media_nofootprint_compressed);
5642 			ledger_get_balance(task->ledger,
5643 			    task_ledgers.graphics_footprint,
5644 			    &vm_info->ledger_tag_graphics_footprint);
5645 			ledger_get_balance(task->ledger,
5646 			    task_ledgers.graphics_footprint_compressed,
5647 			    &vm_info->ledger_tag_graphics_footprint_compressed);
5648 			ledger_get_balance(task->ledger,
5649 			    task_ledgers.graphics_nofootprint,
5650 			    &vm_info->ledger_tag_graphics_nofootprint);
5651 			ledger_get_balance(task->ledger,
5652 			    task_ledgers.graphics_nofootprint_compressed,
5653 			    &vm_info->ledger_tag_graphics_nofootprint_compressed);
5654 			ledger_get_balance(task->ledger,
5655 			    task_ledgers.neural_footprint,
5656 			    &vm_info->ledger_tag_neural_footprint);
5657 			ledger_get_balance(task->ledger,
5658 			    task_ledgers.neural_footprint_compressed,
5659 			    &vm_info->ledger_tag_neural_footprint_compressed);
5660 			ledger_get_balance(task->ledger,
5661 			    task_ledgers.neural_nofootprint,
5662 			    &vm_info->ledger_tag_neural_nofootprint);
5663 			ledger_get_balance(task->ledger,
5664 			    task_ledgers.neural_nofootprint_compressed,
5665 			    &vm_info->ledger_tag_neural_nofootprint_compressed);
5666 			*task_info_count = TASK_VM_INFO_REV3_COUNT;
5667 		}
5668 		if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5669 			if (get_bsdtask_info(task)) {
5670 				vm_info->limit_bytes_remaining =
5671 				    memorystatus_available_memory_internal(get_bsdtask_info(task));
5672 			} else {
5673 				vm_info->limit_bytes_remaining = 0;
5674 			}
5675 			*task_info_count = TASK_VM_INFO_REV4_COUNT;
5676 		}
5677 		if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5678 			thread_t thread;
5679 			uint64_t total = task->decompressions;
5680 			queue_iterate(&task->threads, thread, thread_t, task_threads) {
5681 				total += thread->decompressions;
5682 			}
5683 			vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
5684 			*task_info_count = TASK_VM_INFO_REV5_COUNT;
5685 		}
5686 		if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
5687 			ledger_get_balance(task->ledger, task_ledgers.swapins,
5688 			    &vm_info->ledger_swapins);
5689 			*task_info_count = TASK_VM_INFO_REV6_COUNT;
5690 		}
5691 
5692 		break;
5693 	}
5694 
5695 	case TASK_WAIT_STATE_INFO:
5696 	{
5697 		/*
5698 		 * Deprecated flavor. Currently allowing some results until all users
5699 		 * stop calling it. The results may not be accurate.
5700 		 */
5701 		task_wait_state_info_t  wait_state_info;
5702 		uint64_t total_sfi_ledger_val = 0;
5703 
5704 		if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5705 			error = KERN_INVALID_ARGUMENT;
5706 			break;
5707 		}
5708 
5709 		wait_state_info = (task_wait_state_info_t) task_info_out;
5710 
5711 		wait_state_info->total_wait_state_time = 0;
5712 		bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5713 
5714 #if CONFIG_SCHED_SFI
5715 		int i, prev_lentry = -1;
5716 		int64_t  val_credit, val_debit;
5717 
5718 		for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5719 			val_credit = 0;
5720 			/*
5721 			 * checking with prev_lentry != entry ensures adjacent classes
5722 			 * which share the same ledger do not add wait times twice.
5723 			 * Note: Use ledger() call to get data for each individual sfi class.
5724 			 */
5725 			if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5726 			    KERN_SUCCESS == ledger_get_entries(task->ledger,
5727 			    task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5728 				total_sfi_ledger_val += val_credit;
5729 			}
5730 			prev_lentry = task_ledgers.sfi_wait_times[i];
5731 		}
5732 
5733 #endif /* CONFIG_SCHED_SFI */
5734 		wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5735 		*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5736 
5737 		break;
5738 	}
5739 	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5740 	{
5741 #if DEVELOPMENT || DEBUG
5742 		pvm_account_info_t      acnt_info;
5743 
5744 		if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5745 			error = KERN_INVALID_ARGUMENT;
5746 			break;
5747 		}
5748 
5749 		if (task_info_out == NULL) {
5750 			error = KERN_INVALID_ARGUMENT;
5751 			break;
5752 		}
5753 
5754 		acnt_info = (pvm_account_info_t) task_info_out;
5755 
5756 		error = vm_purgeable_account(task, acnt_info);
5757 
5758 		*task_info_count = PVM_ACCOUNT_INFO_COUNT;
5759 
5760 		break;
5761 #else /* DEVELOPMENT || DEBUG */
5762 		error = KERN_NOT_SUPPORTED;
5763 		break;
5764 #endif /* DEVELOPMENT || DEBUG */
5765 	}
5766 	case TASK_FLAGS_INFO:
5767 	{
5768 		task_flags_info_t               flags_info;
5769 
5770 		if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5771 			error = KERN_INVALID_ARGUMENT;
5772 			break;
5773 		}
5774 
5775 		flags_info = (task_flags_info_t)task_info_out;
5776 
5777 		/* only publish the 64-bit flag of the task */
5778 		flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5779 
5780 		*task_info_count = TASK_FLAGS_INFO_COUNT;
5781 		break;
5782 	}
5783 
5784 	case TASK_DEBUG_INFO_INTERNAL:
5785 	{
5786 #if DEVELOPMENT || DEBUG
5787 		task_debug_info_internal_t dbg_info;
5788 		ipc_space_t space = task->itk_space;
5789 		if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5790 			error = KERN_NOT_SUPPORTED;
5791 			break;
5792 		}
5793 
5794 		if (task_info_out == NULL) {
5795 			error = KERN_INVALID_ARGUMENT;
5796 			break;
5797 		}
5798 		dbg_info = (task_debug_info_internal_t) task_info_out;
5799 		dbg_info->ipc_space_size = 0;
5800 
5801 		if (space) {
5802 			smr_global_enter();
5803 			ipc_entry_table_t table = smr_entered_load(&space->is_table);
5804 			if (table) {
5805 				dbg_info->ipc_space_size =
5806 				    ipc_entry_table_count(table);
5807 			}
5808 			smr_global_leave();
5809 		}
5810 
5811 		dbg_info->suspend_count = task->suspend_count;
5812 
5813 		error = KERN_SUCCESS;
5814 		*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5815 		break;
5816 #else /* DEVELOPMENT || DEBUG */
5817 		error = KERN_NOT_SUPPORTED;
5818 		break;
5819 #endif /* DEVELOPMENT || DEBUG */
5820 	}
5821 	default:
5822 		error = KERN_INVALID_ARGUMENT;
5823 	}
5824 
5825 	task_unlock(task);
5826 	return error;
5827 }
5828 
5829 /*
5830  * task_info_from_user
5831  *
5832  * When calling task_info from user space,
5833  * this function will be executed as mig server side
5834  * instead of calling directly into task_info.
5835  * This gives the possibility to perform more security
5836  * checks on task_port.
5837  *
5838  * In the case of TASK_DYLD_INFO, we require the more
5839  * privileged task_read_port not the less-privileged task_name_port.
5840  *
5841  */
5842 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5843 task_info_from_user(
5844 	mach_port_t             task_port,
5845 	task_flavor_t           flavor,
5846 	task_info_t             task_info_out,
5847 	mach_msg_type_number_t  *task_info_count)
5848 {
5849 	task_t task;
5850 	kern_return_t ret;
5851 
5852 	if (flavor == TASK_DYLD_INFO) {
5853 		task = convert_port_to_task_read(task_port);
5854 	} else {
5855 		task = convert_port_to_task_name(task_port);
5856 	}
5857 
5858 	ret = task_info(task, flavor, task_info_out, task_info_count);
5859 
5860 	task_deallocate(task);
5861 
5862 	return ret;
5863 }
5864 
5865 /*
5866  * Routine: task_dyld_process_info_update_helper
5867  *
5868  * Release send rights in release_ports.
5869  *
5870  * If no active ports found in task's dyld notifier array, unset the magic value
5871  * in user space to indicate so.
5872  *
5873  * Condition:
5874  *      task's itk_lock is locked, and is unlocked upon return.
5875  *      Global g_dyldinfo_mtx is locked, and is unlocked upon return.
5876  */
5877 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)5878 task_dyld_process_info_update_helper(
5879 	task_t                  task,
5880 	size_t                  active_count,
5881 	vm_map_address_t        magic_addr,    /* a userspace address */
5882 	ipc_port_t             *release_ports,
5883 	size_t                  release_count)
5884 {
5885 	void *notifiers_ptr = NULL;
5886 
5887 	assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5888 
5889 	if (active_count == 0) {
5890 		assert(task->itk_dyld_notify != NULL);
5891 		notifiers_ptr = task->itk_dyld_notify;
5892 		task->itk_dyld_notify = NULL;
5893 		itk_unlock(task);
5894 
5895 		kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
5896 		(void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
5897 	} else {
5898 		itk_unlock(task);
5899 		(void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
5900 		    magic_addr);     /* reset magic */
5901 	}
5902 
5903 	lck_mtx_unlock(&g_dyldinfo_mtx);
5904 
5905 	for (size_t i = 0; i < release_count; i++) {
5906 		ipc_port_release_send(release_ports[i]);
5907 	}
5908 }
5909 
5910 /*
5911  * Routine: task_dyld_process_info_notify_register
5912  *
5913  * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
5914  * memory for the array if it's the first port to be registered. Also cleanup
5915  * any dead rights found in the array.
5916  *
5917  * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
5918  *
5919  * Args:
5920  *     task:   Target task for the registration.
5921  *     sright: A send right.
5922  *
5923  * Returns:
5924  *     KERN_SUCCESS: Registration succeeded.
5925  *     KERN_INVALID_TASK: task is invalid.
5926  *     KERN_INVALID_RIGHT: sright is invalid.
5927  *     KERN_DENIED: Security policy denied this call.
5928  *     KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
5929  *     KERN_NO_SPACE: No available notifier port slot left for this task.
5930  *     KERN_RIGHT_EXISTS: The notifier port is already registered and active.
5931  *
5932  *     Other error code see task_info().
5933  *
5934  * See Also:
5935  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
5936  */
5937 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)5938 task_dyld_process_info_notify_register(
5939 	task_t                  task,
5940 	ipc_port_t              sright)
5941 {
5942 	struct task_dyld_info dyld_info;
5943 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
5944 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
5945 	uint32_t release_count = 0, active_count = 0;
5946 	mach_vm_address_t ports_addr; /* a user space address */
5947 	kern_return_t kr;
5948 	boolean_t right_exists = false;
5949 	ipc_port_t *notifiers_ptr = NULL;
5950 	ipc_port_t *portp;
5951 
5952 	if (task == TASK_NULL || task == kernel_task) {
5953 		return KERN_INVALID_TASK;
5954 	}
5955 
5956 	if (!IP_VALID(sright)) {
5957 		return KERN_INVALID_RIGHT;
5958 	}
5959 
5960 #if CONFIG_MACF
5961 	if (mac_task_check_dyld_process_info_notify_register()) {
5962 		return KERN_DENIED;
5963 	}
5964 #endif
5965 
5966 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
5967 	if (kr) {
5968 		return kr;
5969 	}
5970 
5971 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
5972 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5973 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
5974 	} else {
5975 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5976 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
5977 	}
5978 
5979 	if (task->itk_dyld_notify == NULL) {
5980 		notifiers_ptr = kalloc_type(ipc_port_t,
5981 		    DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
5982 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
5983 	}
5984 
5985 	lck_mtx_lock(&g_dyldinfo_mtx);
5986 	itk_lock(task);
5987 
5988 	if (task->itk_dyld_notify == NULL) {
5989 		task->itk_dyld_notify = notifiers_ptr;
5990 		notifiers_ptr = NULL;
5991 	}
5992 
5993 	assert(task->itk_dyld_notify != NULL);
5994 	/* First pass: clear dead names and check for duplicate registration */
5995 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5996 		portp = &task->itk_dyld_notify[slot];
5997 		if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
5998 			release_ports[release_count++] = *portp;
5999 			*portp = IPC_PORT_NULL;
6000 		} else if (*portp == sright) {
6001 			/* the port is already registered and is active */
6002 			right_exists = true;
6003 		}
6004 
6005 		if (*portp != IPC_PORT_NULL) {
6006 			active_count++;
6007 		}
6008 	}
6009 
6010 	if (right_exists) {
6011 		/* skip second pass */
6012 		kr = KERN_RIGHT_EXISTS;
6013 		goto out;
6014 	}
6015 
6016 	/* Second pass: register the port */
6017 	kr = KERN_NO_SPACE;
6018 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6019 		portp = &task->itk_dyld_notify[slot];
6020 		if (*portp == IPC_PORT_NULL) {
6021 			*portp = sright;
6022 			active_count++;
6023 			kr = KERN_SUCCESS;
6024 			break;
6025 		}
6026 	}
6027 
6028 out:
6029 	assert(active_count > 0);
6030 
6031 	task_dyld_process_info_update_helper(task, active_count,
6032 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6033 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6034 
6035 	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6036 
6037 	return kr;
6038 }
6039 
6040 /*
6041  * Routine: task_dyld_process_info_notify_deregister
6042  *
6043  * Remove a send right in target task's itk_dyld_notify array matching the receive
6044  * right name passed in. Deallocate kernel memory for the array if it's the last port to
6045  * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6046  *
6047  * Does not consume any reference.
6048  *
6049  * Args:
6050  *     task: Target task for the deregistration.
6051  *     rcv_name: The name denoting the receive right in caller's space.
6052  *
6053  * Returns:
6054  *     KERN_SUCCESS: A matching entry found and degistration succeeded.
6055  *     KERN_INVALID_TASK: task is invalid.
6056  *     KERN_INVALID_NAME: name is invalid.
6057  *     KERN_DENIED: Security policy denied this call.
6058  *     KERN_FAILURE: A matching entry is not found.
6059  *     KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6060  *
6061  *     Other error code see task_info().
6062  *
6063  * See Also:
6064  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6065  */
6066 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6067 task_dyld_process_info_notify_deregister(
6068 	task_t                  task,
6069 	mach_port_name_t        rcv_name)
6070 {
6071 	struct task_dyld_info dyld_info;
6072 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6073 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6074 	uint32_t release_count = 0, active_count = 0;
6075 	boolean_t port_found = false;
6076 	mach_vm_address_t ports_addr; /* a user space address */
6077 	ipc_port_t sright;
6078 	kern_return_t kr;
6079 	ipc_port_t *portp;
6080 
6081 	if (task == TASK_NULL || task == kernel_task) {
6082 		return KERN_INVALID_TASK;
6083 	}
6084 
6085 	if (!MACH_PORT_VALID(rcv_name)) {
6086 		return KERN_INVALID_NAME;
6087 	}
6088 
6089 #if CONFIG_MACF
6090 	if (mac_task_check_dyld_process_info_notify_register()) {
6091 		return KERN_DENIED;
6092 	}
6093 #endif
6094 
6095 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6096 	if (kr) {
6097 		return kr;
6098 	}
6099 
6100 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6101 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6102 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6103 	} else {
6104 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6105 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6106 	}
6107 
6108 	kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6109 	if (kr) {
6110 		return KERN_INVALID_RIGHT;
6111 	}
6112 
6113 	ip_reference(sright);
6114 	ip_mq_unlock(sright);
6115 
6116 	assert(sright != IPC_PORT_NULL);
6117 
6118 	lck_mtx_lock(&g_dyldinfo_mtx);
6119 	itk_lock(task);
6120 
6121 	if (task->itk_dyld_notify == NULL) {
6122 		itk_unlock(task);
6123 		lck_mtx_unlock(&g_dyldinfo_mtx);
6124 		ip_release(sright);
6125 		return KERN_FAILURE;
6126 	}
6127 
6128 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6129 		portp = &task->itk_dyld_notify[slot];
6130 		if (*portp == sright) {
6131 			release_ports[release_count++] = *portp;
6132 			*portp = IPC_PORT_NULL;
6133 			port_found = true;
6134 		} else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6135 			release_ports[release_count++] = *portp;
6136 			*portp = IPC_PORT_NULL;
6137 		}
6138 
6139 		if (*portp != IPC_PORT_NULL) {
6140 			active_count++;
6141 		}
6142 	}
6143 
6144 	task_dyld_process_info_update_helper(task, active_count,
6145 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6146 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6147 
6148 	ip_release(sright);
6149 
6150 	return port_found ? KERN_SUCCESS : KERN_FAILURE;
6151 }
6152 
6153 /*
6154  *	task_power_info
6155  *
6156  *	Returns power stats for the task.
6157  *	Note: Called with task locked.
6158  */
6159 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,struct task_power_info_extra * extra_info)6160 task_power_info_locked(
6161 	task_t                        task,
6162 	task_power_info_t             info,
6163 	gpu_energy_data_t             ginfo,
6164 	task_power_info_v2_t          infov2,
6165 	struct task_power_info_extra *extra_info)
6166 {
6167 	thread_t                thread;
6168 	ledger_amount_t         tmp;
6169 
6170 	uint64_t                runnable_time_sum = 0;
6171 
6172 	task_lock_assert_owned(task);
6173 
6174 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6175 	    (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6176 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6177 	    (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6178 
6179 	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6180 	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6181 
6182 	struct recount_usage usage = { 0 };
6183 	struct recount_usage usage_perf = { 0 };
6184 	recount_task_usage_perf_only(task, &usage, &usage_perf);
6185 
6186 	info->total_user = usage.ru_user_time_mach;
6187 	info->total_system = usage.ru_system_time_mach;
6188 	runnable_time_sum = task->total_runnable_time;
6189 
6190 	if (ginfo) {
6191 		ginfo->task_gpu_utilisation = task->task_gpu_ns;
6192 	}
6193 
6194 	if (infov2) {
6195 		infov2->task_ptime = usage_perf.ru_system_time_mach +
6196 		    usage_perf.ru_user_time_mach;
6197 		infov2->task_pset_switches = task->ps_switch;
6198 #if CONFIG_PERVASIVE_ENERGY
6199 		infov2->task_energy = usage.ru_energy_nj;
6200 #endif /* CONFIG_PERVASIVE_ENERGY */
6201 	}
6202 
6203 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6204 		spl_t x;
6205 
6206 		if (thread->options & TH_OPT_IDLE_THREAD) {
6207 			continue;
6208 		}
6209 
6210 		x = splsched();
6211 		thread_lock(thread);
6212 
6213 		info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6214 		info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6215 
6216 		if (infov2) {
6217 			infov2->task_pset_switches += thread->ps_switch;
6218 		}
6219 
6220 		runnable_time_sum += timer_grab(&thread->runnable_timer);
6221 
6222 		if (ginfo) {
6223 			ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6224 		}
6225 		thread_unlock(thread);
6226 		splx(x);
6227 	}
6228 
6229 	if (extra_info) {
6230 		extra_info->runnable_time = runnable_time_sum;
6231 #if CONFIG_PERVASIVE_CPI
6232 		extra_info->cycles = usage.ru_cycles;
6233 		extra_info->instructions = usage.ru_instructions;
6234 		extra_info->pcycles = usage_perf.ru_cycles;
6235 		extra_info->pinstructions = usage_perf.ru_instructions;
6236 		extra_info->user_ptime = usage_perf.ru_user_time_mach;
6237 		extra_info->system_ptime = usage_perf.ru_system_time_mach;
6238 #endif // CONFIG_PERVASIVE_CPI
6239 #if CONFIG_PERVASIVE_ENERGY
6240 		extra_info->energy = usage.ru_energy_nj;
6241 		extra_info->penergy = usage_perf.ru_energy_nj;
6242 #endif // CONFIG_PERVASIVE_ENERGY
6243 	}
6244 }
6245 
6246 /*
6247  *	task_gpu_utilisation
6248  *
6249  *	Returns the total gpu time used by the all the threads of the task
6250  *  (both dead and alive)
6251  */
6252 uint64_t
task_gpu_utilisation(task_t task)6253 task_gpu_utilisation(
6254 	task_t  task)
6255 {
6256 	uint64_t gpu_time = 0;
6257 #if defined(__x86_64__)
6258 	thread_t thread;
6259 
6260 	task_lock(task);
6261 	gpu_time += task->task_gpu_ns;
6262 
6263 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6264 		spl_t x;
6265 		x = splsched();
6266 		thread_lock(thread);
6267 		gpu_time += ml_gpu_stat(thread);
6268 		thread_unlock(thread);
6269 		splx(x);
6270 	}
6271 
6272 	task_unlock(task);
6273 #else /* defined(__x86_64__) */
6274 	/* silence compiler warning */
6275 	(void)task;
6276 #endif /* defined(__x86_64__) */
6277 	return gpu_time;
6278 }
6279 
6280 /* This function updates the cpu time in the arrays for each
6281  * effective and requested QoS class
6282  */
6283 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6284 task_update_cpu_time_qos_stats(
6285 	task_t  task,
6286 	uint64_t *eqos_stats,
6287 	uint64_t *rqos_stats)
6288 {
6289 	if (!eqos_stats && !rqos_stats) {
6290 		return;
6291 	}
6292 
6293 	task_lock(task);
6294 	thread_t thread;
6295 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6296 		if (thread->options & TH_OPT_IDLE_THREAD) {
6297 			continue;
6298 		}
6299 
6300 		thread_update_qos_cpu_time(thread);
6301 	}
6302 
6303 	if (eqos_stats) {
6304 		eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6305 		eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6306 		eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6307 		eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6308 		eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6309 		eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6310 		eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6311 	}
6312 
6313 	if (rqos_stats) {
6314 		rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6315 		rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6316 		rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6317 		rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6318 		rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6319 		rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6320 		rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6321 	}
6322 
6323 	task_unlock(task);
6324 }
6325 
6326 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6327 task_purgable_info(
6328 	task_t                  task,
6329 	task_purgable_info_t    *stats)
6330 {
6331 	if (task == TASK_NULL || stats == NULL) {
6332 		return KERN_INVALID_ARGUMENT;
6333 	}
6334 	/* Take task reference */
6335 	task_reference(task);
6336 	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6337 	/* Drop task reference */
6338 	task_deallocate(task);
6339 	return KERN_SUCCESS;
6340 }
6341 
6342 void
task_vtimer_set(task_t task,integer_t which)6343 task_vtimer_set(
6344 	task_t          task,
6345 	integer_t       which)
6346 {
6347 	thread_t        thread;
6348 	spl_t           x;
6349 
6350 	task_lock(task);
6351 
6352 	task->vtimers |= which;
6353 
6354 	switch (which) {
6355 	case TASK_VTIMER_USER:
6356 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6357 			x = splsched();
6358 			thread_lock(thread);
6359 			struct recount_times_mach times = recount_thread_times(thread);
6360 			thread->vtimer_user_save = times.rtm_user;
6361 			thread_unlock(thread);
6362 			splx(x);
6363 		}
6364 		break;
6365 
6366 	case TASK_VTIMER_PROF:
6367 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6368 			x = splsched();
6369 			thread_lock(thread);
6370 			thread->vtimer_prof_save = recount_thread_time_mach(thread);
6371 			thread_unlock(thread);
6372 			splx(x);
6373 		}
6374 		break;
6375 
6376 	case TASK_VTIMER_RLIM:
6377 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6378 			x = splsched();
6379 			thread_lock(thread);
6380 			thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6381 			thread_unlock(thread);
6382 			splx(x);
6383 		}
6384 		break;
6385 	}
6386 
6387 	task_unlock(task);
6388 }
6389 
6390 void
task_vtimer_clear(task_t task,integer_t which)6391 task_vtimer_clear(
6392 	task_t          task,
6393 	integer_t       which)
6394 {
6395 	task_lock(task);
6396 
6397 	task->vtimers &= ~which;
6398 
6399 	task_unlock(task);
6400 }
6401 
6402 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6403 task_vtimer_update(
6404 	__unused
6405 	task_t          task,
6406 	integer_t       which,
6407 	uint32_t        *microsecs)
6408 {
6409 	thread_t        thread = current_thread();
6410 	uint32_t        tdelt = 0;
6411 	clock_sec_t     secs = 0;
6412 	uint64_t        tsum;
6413 
6414 	assert(task == current_task());
6415 
6416 	spl_t s = splsched();
6417 	thread_lock(thread);
6418 
6419 	if ((task->vtimers & which) != (uint32_t)which) {
6420 		thread_unlock(thread);
6421 		splx(s);
6422 		return;
6423 	}
6424 
6425 	switch (which) {
6426 	case TASK_VTIMER_USER:;
6427 		struct recount_times_mach times = recount_thread_times(thread);
6428 		tsum = times.rtm_user;
6429 		tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6430 		thread->vtimer_user_save = tsum;
6431 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6432 		break;
6433 
6434 	case TASK_VTIMER_PROF:
6435 		tsum = recount_current_thread_time_mach();
6436 		tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6437 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6438 		/* if the time delta is smaller than a usec, ignore */
6439 		if (*microsecs != 0) {
6440 			thread->vtimer_prof_save = tsum;
6441 		}
6442 		break;
6443 
6444 	case TASK_VTIMER_RLIM:
6445 		tsum = recount_current_thread_time_mach();
6446 		tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6447 		thread->vtimer_rlim_save = tsum;
6448 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6449 		break;
6450 	}
6451 
6452 	thread_unlock(thread);
6453 	splx(s);
6454 }
6455 
6456 uint64_t
get_task_dispatchqueue_offset(task_t task)6457 get_task_dispatchqueue_offset(
6458 	task_t          task)
6459 {
6460 	return task->dispatchqueue_offset;
6461 }
6462 
6463 void
task_synchronizer_destroy_all(task_t task)6464 task_synchronizer_destroy_all(task_t task)
6465 {
6466 	/*
6467 	 *  Destroy owned semaphores
6468 	 */
6469 	semaphore_destroy_all(task);
6470 }
6471 
6472 /*
6473  * Install default (machine-dependent) initial thread state
6474  * on the task.  Subsequent thread creation will have this initial
6475  * state set on the thread by machine_thread_inherit_taskwide().
6476  * Flavors and structures are exactly the same as those to thread_set_state()
6477  */
6478 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6479 task_set_state(
6480 	task_t task,
6481 	int flavor,
6482 	thread_state_t state,
6483 	mach_msg_type_number_t state_count)
6484 {
6485 	kern_return_t ret;
6486 
6487 	if (task == TASK_NULL) {
6488 		return KERN_INVALID_ARGUMENT;
6489 	}
6490 
6491 	task_lock(task);
6492 
6493 	if (!task->active) {
6494 		task_unlock(task);
6495 		return KERN_FAILURE;
6496 	}
6497 
6498 	ret = machine_task_set_state(task, flavor, state, state_count);
6499 
6500 	task_unlock(task);
6501 	return ret;
6502 }
6503 
6504 /*
6505  * Examine the default (machine-dependent) initial thread state
6506  * on the task, as set by task_set_state().  Flavors and structures
6507  * are exactly the same as those passed to thread_get_state().
6508  */
6509 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6510 task_get_state(
6511 	task_t  task,
6512 	int     flavor,
6513 	thread_state_t state,
6514 	mach_msg_type_number_t *state_count)
6515 {
6516 	kern_return_t ret;
6517 
6518 	if (task == TASK_NULL) {
6519 		return KERN_INVALID_ARGUMENT;
6520 	}
6521 
6522 	task_lock(task);
6523 
6524 	if (!task->active) {
6525 		task_unlock(task);
6526 		return KERN_FAILURE;
6527 	}
6528 
6529 	ret = machine_task_get_state(task, flavor, state, state_count);
6530 
6531 	task_unlock(task);
6532 	return ret;
6533 }
6534 
6535 
6536 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,boolean_t backtrace_only)6537 PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6538 	mach_exception_code_t code,
6539 	mach_exception_subcode_t subcode,
6540 	void *reason,
6541 	boolean_t backtrace_only)
6542 {
6543 #ifdef MACH_BSD
6544 	if (1 == proc_selfpid()) {
6545 		return KERN_NOT_SUPPORTED;              // initproc is immune
6546 	}
6547 #endif
6548 	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6549 		[0] = code,
6550 		[1] = subcode,
6551 	};
6552 	task_t task = current_task();
6553 	kern_return_t kr;
6554 	void *bsd_info = get_bsdtask_info(task);
6555 
6556 	/* (See jetsam-related comments below) */
6557 
6558 	proc_memstat_skip(bsd_info, TRUE);
6559 	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason, backtrace_only);
6560 	proc_memstat_skip(bsd_info, FALSE);
6561 	return kr;
6562 }
6563 
6564 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,bool backtrace_only)6565 task_violated_guard(
6566 	mach_exception_code_t code,
6567 	mach_exception_subcode_t subcode,
6568 	void *reason,
6569 	bool backtrace_only)
6570 {
6571 	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6572 }
6573 
6574 
6575 #if CONFIG_MEMORYSTATUS
6576 
6577 boolean_t
task_get_memlimit_is_active(task_t task)6578 task_get_memlimit_is_active(task_t task)
6579 {
6580 	assert(task != NULL);
6581 
6582 	if (task->memlimit_is_active == 1) {
6583 		return TRUE;
6584 	} else {
6585 		return FALSE;
6586 	}
6587 }
6588 
6589 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6590 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6591 {
6592 	assert(task != NULL);
6593 
6594 	if (memlimit_is_active) {
6595 		task->memlimit_is_active = 1;
6596 	} else {
6597 		task->memlimit_is_active = 0;
6598 	}
6599 }
6600 
6601 boolean_t
task_get_memlimit_is_fatal(task_t task)6602 task_get_memlimit_is_fatal(task_t task)
6603 {
6604 	assert(task != NULL);
6605 
6606 	if (task->memlimit_is_fatal == 1) {
6607 		return TRUE;
6608 	} else {
6609 		return FALSE;
6610 	}
6611 }
6612 
6613 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6614 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6615 {
6616 	assert(task != NULL);
6617 
6618 	if (memlimit_is_fatal) {
6619 		task->memlimit_is_fatal = 1;
6620 	} else {
6621 		task->memlimit_is_fatal = 0;
6622 	}
6623 }
6624 
6625 uint64_t
task_get_dirty_start(task_t task)6626 task_get_dirty_start(task_t task)
6627 {
6628 	return task->memstat_dirty_start;
6629 }
6630 
6631 void
task_set_dirty_start(task_t task,uint64_t start)6632 task_set_dirty_start(task_t task, uint64_t start)
6633 {
6634 	task_lock(task);
6635 	task->memstat_dirty_start = start;
6636 	task_unlock(task);
6637 }
6638 
6639 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6640 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6641 {
6642 	boolean_t triggered = FALSE;
6643 
6644 	assert(task == current_task());
6645 
6646 	/*
6647 	 * Returns true, if task has already triggered an exc_resource exception.
6648 	 */
6649 
6650 	if (memlimit_is_active) {
6651 		triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6652 	} else {
6653 		triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6654 	}
6655 
6656 	return triggered;
6657 }
6658 
6659 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6660 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6661 {
6662 	assert(task == current_task());
6663 
6664 	/*
6665 	 * We allow one exc_resource per process per active/inactive limit.
6666 	 * The limit's fatal attribute does not come into play.
6667 	 */
6668 
6669 	if (memlimit_is_active) {
6670 		task->memlimit_active_exc_resource = 1;
6671 	} else {
6672 		task->memlimit_inactive_exc_resource = 1;
6673 	}
6674 }
6675 
6676 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6677 
6678 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,boolean_t is_fatal)6679 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6680 {
6681 	task_t                                          task            = current_task();
6682 	int                                                     pid         = 0;
6683 	const char                                      *procname       = "unknown";
6684 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6685 	boolean_t send_sync_exc_resource = FALSE;
6686 	void *cur_bsd_info = get_bsdtask_info(current_task());
6687 
6688 #ifdef MACH_BSD
6689 	pid = proc_selfpid();
6690 
6691 	if (pid == 1) {
6692 		/*
6693 		 * Cannot have ReportCrash analyzing
6694 		 * a suspended initproc.
6695 		 */
6696 		return;
6697 	}
6698 
6699 	if (cur_bsd_info != NULL) {
6700 		procname = proc_name_address(cur_bsd_info);
6701 		send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(cur_bsd_info);
6702 	}
6703 #endif
6704 #if CONFIG_COREDUMP
6705 	if (hwm_user_cores) {
6706 		int                             error;
6707 		uint64_t                starttime, end;
6708 		clock_sec_t             secs = 0;
6709 		uint32_t                microsecs = 0;
6710 
6711 		starttime = mach_absolute_time();
6712 		/*
6713 		 * Trigger a coredump of this process. Don't proceed unless we know we won't
6714 		 * be filling up the disk; and ignore the core size resource limit for this
6715 		 * core file.
6716 		 */
6717 		if ((error = coredump(cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6718 			printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6719 		}
6720 		/*
6721 		 * coredump() leaves the task suspended.
6722 		 */
6723 		task_resume_internal(current_task());
6724 
6725 		end = mach_absolute_time();
6726 		absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6727 		printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6728 		    proc_name_address(cur_bsd_info), pid, (int)secs, microsecs);
6729 	}
6730 #endif /* CONFIG_COREDUMP */
6731 
6732 	if (disable_exc_resource) {
6733 		printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6734 		    "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6735 		return;
6736 	}
6737 
6738 	/*
6739 	 * A task that has triggered an EXC_RESOURCE, should not be
6740 	 * jetsammed when the device is under memory pressure.  Here
6741 	 * we set the P_MEMSTAT_SKIP flag so that the process
6742 	 * will be skipped if the memorystatus_thread wakes up.
6743 	 *
6744 	 * This is a debugging aid to ensure we can get a corpse before
6745 	 * the jetsam thread kills the process.
6746 	 * Note that proc_memstat_skip is a no-op on release kernels.
6747 	 */
6748 	proc_memstat_skip(cur_bsd_info, TRUE);
6749 
6750 	code[0] = code[1] = 0;
6751 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6752 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6753 	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6754 
6755 	/*
6756 	 * Do not generate a corpse fork if the violation is a fatal one
6757 	 * or the process wants synchronous EXC_RESOURCE exceptions.
6758 	 */
6759 	if (is_fatal || send_sync_exc_resource || !exc_via_corpse_forking) {
6760 		/* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6761 		if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
6762 			/*
6763 			 * Use the _internal_ variant so that no user-space
6764 			 * process can resume our task from under us.
6765 			 */
6766 			task_suspend_internal(task);
6767 			exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6768 			task_resume_internal(task);
6769 		}
6770 	} else {
6771 		if (audio_active) {
6772 			printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6773 			    "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6774 		} else {
6775 			task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6776 			    code, EXCEPTION_CODE_MAX, NULL, FALSE);
6777 		}
6778 	}
6779 
6780 	/*
6781 	 * After the EXC_RESOURCE has been handled, we must clear the
6782 	 * P_MEMSTAT_SKIP flag so that the process can again be
6783 	 * considered for jetsam if the memorystatus_thread wakes up.
6784 	 */
6785 	proc_memstat_skip(cur_bsd_info, FALSE);         /* clear the flag */
6786 }
6787 
6788 /*
6789  * Callback invoked when a task exceeds its physical footprint limit.
6790  */
6791 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)6792 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6793 {
6794 	ledger_amount_t max_footprint, max_footprint_mb;
6795 #if CONFIG_DEFERRED_RECLAIM
6796 	ledger_amount_t current_footprint;
6797 #endif /* CONFIG_DEFERRED_RECLAIM */
6798 	task_t task;
6799 	boolean_t is_warning;
6800 	boolean_t memlimit_is_active;
6801 	boolean_t memlimit_is_fatal;
6802 
6803 	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6804 		/*
6805 		 * Task memory limits only provide a warning on the way up.
6806 		 */
6807 		return;
6808 	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6809 		/*
6810 		 * This task is in danger of violating a memory limit,
6811 		 * It has exceeded a percentage level of the limit.
6812 		 */
6813 		is_warning = TRUE;
6814 	} else {
6815 		/*
6816 		 * The task has exceeded the physical footprint limit.
6817 		 * This is not a warning but a true limit violation.
6818 		 */
6819 		is_warning = FALSE;
6820 	}
6821 
6822 	task = current_task();
6823 
6824 	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6825 #if CONFIG_DEFERRED_RECLAIM
6826 	if (task->deferred_reclamation_metadata != NULL) {
6827 		/*
6828 		 * Task is enrolled in deferred reclamation.
6829 		 * Do a reclaim to ensure it's really over its limit.
6830 		 */
6831 		vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
6832 		ledger_get_balance(task->ledger, task_ledgers.phys_footprint, &current_footprint);
6833 		if (current_footprint < max_footprint) {
6834 			return;
6835 		}
6836 	}
6837 #endif /* CONFIG_DEFERRED_RECLAIM */
6838 	max_footprint_mb = max_footprint >> 20;
6839 
6840 	memlimit_is_active = task_get_memlimit_is_active(task);
6841 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6842 
6843 	/*
6844 	 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6845 	 * We only generate the exception once per process per memlimit (active/inactive limit).
6846 	 * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6847 	 * and we disable it by marking that memlimit as exception triggered.
6848 	 */
6849 	if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6850 		PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6851 		memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6852 		task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6853 	}
6854 
6855 	memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6856 }
6857 
6858 extern int proc_check_footprint_priv(void);
6859 
6860 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)6861 task_set_phys_footprint_limit(
6862 	task_t task,
6863 	int new_limit_mb,
6864 	int *old_limit_mb)
6865 {
6866 	kern_return_t error;
6867 
6868 	boolean_t memlimit_is_active;
6869 	boolean_t memlimit_is_fatal;
6870 
6871 	if ((error = proc_check_footprint_priv())) {
6872 		return KERN_NO_ACCESS;
6873 	}
6874 
6875 	/*
6876 	 * This call should probably be obsoleted.
6877 	 * But for now, we default to current state.
6878 	 */
6879 	memlimit_is_active = task_get_memlimit_is_active(task);
6880 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6881 
6882 	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6883 }
6884 
6885 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)6886 task_convert_phys_footprint_limit(
6887 	int limit_mb,
6888 	int *converted_limit_mb)
6889 {
6890 	if (limit_mb == -1) {
6891 		/*
6892 		 * No limit
6893 		 */
6894 		if (max_task_footprint != 0) {
6895 			*converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);         /* bytes to MB */
6896 		} else {
6897 			*converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6898 		}
6899 	} else {
6900 		/* nothing to convert */
6901 		*converted_limit_mb = limit_mb;
6902 	}
6903 	return KERN_SUCCESS;
6904 }
6905 
6906 
6907 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)6908 task_set_phys_footprint_limit_internal(
6909 	task_t task,
6910 	int new_limit_mb,
6911 	int *old_limit_mb,
6912 	boolean_t memlimit_is_active,
6913 	boolean_t memlimit_is_fatal)
6914 {
6915 	ledger_amount_t old;
6916 	kern_return_t ret;
6917 
6918 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6919 
6920 	if (ret != KERN_SUCCESS) {
6921 		return ret;
6922 	}
6923 
6924 	/*
6925 	 * Check that limit >> 20 will not give an "unexpected" 32-bit
6926 	 * result. There are, however, implicit assumptions that -1 mb limit
6927 	 * equates to LEDGER_LIMIT_INFINITY.
6928 	 */
6929 	assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6930 
6931 	if (old_limit_mb) {
6932 		*old_limit_mb = (int)(old >> 20);
6933 	}
6934 
6935 	if (new_limit_mb == -1) {
6936 		/*
6937 		 * Caller wishes to remove the limit.
6938 		 */
6939 		ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6940 		    max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6941 		    max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
6942 
6943 		task_lock(task);
6944 		task_set_memlimit_is_active(task, memlimit_is_active);
6945 		task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6946 		task_unlock(task);
6947 
6948 		return KERN_SUCCESS;
6949 	}
6950 
6951 #ifdef CONFIG_NOMONITORS
6952 	return KERN_SUCCESS;
6953 #endif /* CONFIG_NOMONITORS */
6954 
6955 	task_lock(task);
6956 
6957 	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6958 	    (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6959 	    (((ledger_amount_t)new_limit_mb << 20) == old)) {
6960 		/*
6961 		 * memlimit state is not changing
6962 		 */
6963 		task_unlock(task);
6964 		return KERN_SUCCESS;
6965 	}
6966 
6967 	task_set_memlimit_is_active(task, memlimit_is_active);
6968 	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6969 
6970 	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6971 	    (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6972 
6973 	if (task == current_task()) {
6974 		ledger_check_new_balance(current_thread(), task->ledger,
6975 		    task_ledgers.phys_footprint);
6976 	}
6977 
6978 	task_unlock(task);
6979 
6980 	return KERN_SUCCESS;
6981 }
6982 
6983 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)6984 task_get_phys_footprint_limit(
6985 	task_t task,
6986 	int *limit_mb)
6987 {
6988 	ledger_amount_t limit;
6989 	kern_return_t ret;
6990 
6991 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6992 	if (ret != KERN_SUCCESS) {
6993 		return ret;
6994 	}
6995 
6996 	/*
6997 	 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6998 	 * result. There are, however, implicit assumptions that -1 mb limit
6999 	 * equates to LEDGER_LIMIT_INFINITY.
7000 	 */
7001 	assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7002 	*limit_mb = (int)(limit >> 20);
7003 
7004 	return KERN_SUCCESS;
7005 }
7006 #else /* CONFIG_MEMORYSTATUS */
7007 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7008 task_set_phys_footprint_limit(
7009 	__unused task_t task,
7010 	__unused int new_limit_mb,
7011 	__unused int *old_limit_mb)
7012 {
7013 	return KERN_FAILURE;
7014 }
7015 
7016 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7017 task_get_phys_footprint_limit(
7018 	__unused task_t task,
7019 	__unused int *limit_mb)
7020 {
7021 	return KERN_FAILURE;
7022 }
7023 #endif /* CONFIG_MEMORYSTATUS */
7024 
7025 security_token_t *
task_get_sec_token(task_t task)7026 task_get_sec_token(task_t task)
7027 {
7028 	return &task_get_ro(task)->task_tokens.sec_token;
7029 }
7030 
7031 void
task_set_sec_token(task_t task,security_token_t * token)7032 task_set_sec_token(task_t task, security_token_t *token)
7033 {
7034 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7035 	    task_tokens.sec_token, token);
7036 }
7037 
7038 audit_token_t *
task_get_audit_token(task_t task)7039 task_get_audit_token(task_t task)
7040 {
7041 	return &task_get_ro(task)->task_tokens.audit_token;
7042 }
7043 
7044 void
task_set_audit_token(task_t task,audit_token_t * token)7045 task_set_audit_token(task_t task, audit_token_t *token)
7046 {
7047 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7048 	    task_tokens.audit_token, token);
7049 }
7050 
7051 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7052 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7053 {
7054 	struct task_token_ro_data tokens;
7055 
7056 	tokens = task_get_ro(task)->task_tokens;
7057 	tokens.sec_token = *sec_token;
7058 	tokens.audit_token = *audit_token;
7059 
7060 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7061 	    &tokens);
7062 }
7063 
7064 boolean_t
task_is_privileged(task_t task)7065 task_is_privileged(task_t task)
7066 {
7067 	return task_get_sec_token(task)->val[0] == 0;
7068 }
7069 
7070 #ifdef CONFIG_MACF
7071 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7072 task_get_mach_trap_filter_mask(task_t task)
7073 {
7074 	return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7075 }
7076 
7077 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7078 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7079 {
7080 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7081 	    task_filters.mach_trap_filter_mask, &mask);
7082 }
7083 
7084 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7085 task_get_mach_kobj_filter_mask(task_t task)
7086 {
7087 	return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7088 }
7089 
7090 mach_vm_address_t
task_get_all_image_info_addr(task_t task)7091 task_get_all_image_info_addr(task_t task)
7092 {
7093 	return task->all_image_info_addr;
7094 }
7095 
7096 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7097 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7098 {
7099 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7100 	    task_filters.mach_kobj_filter_mask, &mask);
7101 }
7102 
7103 #endif /* CONFIG_MACF */
7104 
7105 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7106 task_set_thread_limit(task_t task, uint16_t thread_limit)
7107 {
7108 	assert(task != kernel_task);
7109 	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7110 		task_lock(task);
7111 		task->task_thread_limit = thread_limit;
7112 		task_unlock(task);
7113 	}
7114 }
7115 
7116 #if CONFIG_PROC_RESOURCE_LIMITS
7117 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7118 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7119 {
7120 	return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7121 }
7122 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7123 
7124 #if XNU_TARGET_OS_OSX
7125 boolean_t
task_has_system_version_compat_enabled(task_t task)7126 task_has_system_version_compat_enabled(task_t task)
7127 {
7128 	boolean_t enabled = FALSE;
7129 
7130 	task_lock(task);
7131 	enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7132 	task_unlock(task);
7133 
7134 	return enabled;
7135 }
7136 
7137 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7138 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7139 {
7140 	assert(task == current_task());
7141 	assert(task != kernel_task);
7142 
7143 	task_lock(task);
7144 	if (enable_system_version_compat) {
7145 		task->t_flags |= TF_SYS_VERSION_COMPAT;
7146 	} else {
7147 		task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7148 	}
7149 	task_unlock(task);
7150 }
7151 #endif /* XNU_TARGET_OS_OSX */
7152 
7153 /*
7154  * We need to export some functions to other components that
7155  * are currently implemented in macros within the osfmk
7156  * component.  Just export them as functions of the same name.
7157  */
7158 boolean_t
is_kerneltask(task_t t)7159 is_kerneltask(task_t t)
7160 {
7161 	if (t == kernel_task) {
7162 		return TRUE;
7163 	}
7164 
7165 	return FALSE;
7166 }
7167 
7168 boolean_t
is_corpsetask(task_t t)7169 is_corpsetask(task_t t)
7170 {
7171 	return task_is_a_corpse(t);
7172 }
7173 
7174 boolean_t
is_corpsefork(task_t t)7175 is_corpsefork(task_t t)
7176 {
7177 	return task_is_a_corpse_fork(t);
7178 }
7179 
7180 task_t
current_task_early(void)7181 current_task_early(void)
7182 {
7183 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7184 		if (current_thread()->t_tro == NULL) {
7185 			return TASK_NULL;
7186 		}
7187 	}
7188 	return get_threadtask(current_thread());
7189 }
7190 
7191 task_t
current_task(void)7192 current_task(void)
7193 {
7194 	return get_threadtask(current_thread());
7195 }
7196 
7197 /* defined in bsd/kern/kern_prot.c */
7198 extern int get_audit_token_pid(audit_token_t *audit_token);
7199 
7200 int
task_pid(task_t task)7201 task_pid(task_t task)
7202 {
7203 	if (task) {
7204 		return get_audit_token_pid(task_get_audit_token(task));
7205 	}
7206 	return -1;
7207 }
7208 
7209 #if __has_feature(ptrauth_calls)
7210 /*
7211  * Get the shared region id and jop signing key for the task.
7212  * The function will allocate a kalloc buffer and return
7213  * it to caller, the caller needs to free it. This is used
7214  * for getting the information via task port.
7215  */
7216 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7217 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7218 {
7219 	size_t len;
7220 	char *shared_region_id = NULL;
7221 
7222 	task_lock(task);
7223 	if (task->shared_region_id == NULL) {
7224 		task_unlock(task);
7225 		return NULL;
7226 	}
7227 	len = strlen(task->shared_region_id) + 1;
7228 
7229 	/* don't hold task lock while allocating */
7230 	task_unlock(task);
7231 	shared_region_id = kalloc_data(len, Z_WAITOK);
7232 	task_lock(task);
7233 
7234 	if (task->shared_region_id == NULL) {
7235 		task_unlock(task);
7236 		kfree_data(shared_region_id, len);
7237 		return NULL;
7238 	}
7239 	assert(len == strlen(task->shared_region_id) + 1);         /* should never change */
7240 	strlcpy(shared_region_id, task->shared_region_id, len);
7241 	task_unlock(task);
7242 
7243 	/* find key from its auth pager */
7244 	if (jop_pid != NULL) {
7245 		*jop_pid = shared_region_find_key(shared_region_id);
7246 	}
7247 
7248 	return shared_region_id;
7249 }
7250 
7251 /*
7252  * set the shared region id for a task
7253  */
7254 void
task_set_shared_region_id(task_t task,char * id)7255 task_set_shared_region_id(task_t task, char *id)
7256 {
7257 	char *old_id;
7258 
7259 	task_lock(task);
7260 	old_id = task->shared_region_id;
7261 	task->shared_region_id = id;
7262 	task->shared_region_auth_remapped = FALSE;
7263 	task_unlock(task);
7264 
7265 	/* free any pre-existing shared region id */
7266 	if (old_id != NULL) {
7267 		shared_region_key_dealloc(old_id);
7268 		kfree_data(old_id, strlen(old_id) + 1);
7269 	}
7270 }
7271 #endif /* __has_feature(ptrauth_calls) */
7272 
7273 /*
7274  * This routine finds a thread in a task by its unique id
7275  * Returns a referenced thread or THREAD_NULL if the thread was not found
7276  *
7277  * TODO: This is super inefficient - it's an O(threads in task) list walk!
7278  *       We should make a tid hash, or transition all tid clients to thread ports
7279  *
7280  * Precondition: No locks held (will take task lock)
7281  */
7282 thread_t
task_findtid(task_t task,uint64_t tid)7283 task_findtid(task_t task, uint64_t tid)
7284 {
7285 	thread_t self           = current_thread();
7286 	thread_t found_thread   = THREAD_NULL;
7287 	thread_t iter_thread    = THREAD_NULL;
7288 
7289 	/* Short-circuit the lookup if we're looking up ourselves */
7290 	if (tid == self->thread_id || tid == TID_NULL) {
7291 		assert(get_threadtask(self) == task);
7292 
7293 		thread_reference(self);
7294 
7295 		return self;
7296 	}
7297 
7298 	task_lock(task);
7299 
7300 	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7301 		if (iter_thread->thread_id == tid) {
7302 			found_thread = iter_thread;
7303 			thread_reference(found_thread);
7304 			break;
7305 		}
7306 	}
7307 
7308 	task_unlock(task);
7309 
7310 	return found_thread;
7311 }
7312 
7313 int
pid_from_task(task_t task)7314 pid_from_task(task_t task)
7315 {
7316 	int pid = -1;
7317 	void *bsd_info = get_bsdtask_info(task);
7318 
7319 	if (bsd_info) {
7320 		pid = proc_pid(bsd_info);
7321 	} else {
7322 		pid = task_pid(task);
7323 	}
7324 
7325 	return pid;
7326 }
7327 
7328 /*
7329  * Control the CPU usage monitor for a task.
7330  */
7331 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7332 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7333 {
7334 	int error = KERN_SUCCESS;
7335 
7336 	if (*flags & CPUMON_MAKE_FATAL) {
7337 		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7338 	} else {
7339 		error = KERN_INVALID_ARGUMENT;
7340 	}
7341 
7342 	return error;
7343 }
7344 
7345 /*
7346  * Control the wakeups monitor for a task.
7347  */
7348 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7349 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7350 {
7351 	ledger_t ledger = task->ledger;
7352 
7353 	task_lock(task);
7354 	if (*flags & WAKEMON_GET_PARAMS) {
7355 		ledger_amount_t limit;
7356 		uint64_t                period;
7357 
7358 		ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7359 		ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7360 
7361 		if (limit != LEDGER_LIMIT_INFINITY) {
7362 			/*
7363 			 * An active limit means the wakeups monitor is enabled.
7364 			 */
7365 			*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7366 			*flags = WAKEMON_ENABLE;
7367 			if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7368 				*flags |= WAKEMON_MAKE_FATAL;
7369 			}
7370 		} else {
7371 			*flags = WAKEMON_DISABLE;
7372 			*rate_hz = -1;
7373 		}
7374 
7375 		/*
7376 		 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7377 		 */
7378 		task_unlock(task);
7379 		return KERN_SUCCESS;
7380 	}
7381 
7382 	if (*flags & WAKEMON_ENABLE) {
7383 		if (*flags & WAKEMON_SET_DEFAULTS) {
7384 			*rate_hz = task_wakeups_monitor_rate;
7385 		}
7386 
7387 #ifndef CONFIG_NOMONITORS
7388 		if (*flags & WAKEMON_MAKE_FATAL) {
7389 			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7390 		}
7391 #endif /* CONFIG_NOMONITORS */
7392 
7393 		if (*rate_hz <= 0) {
7394 			task_unlock(task);
7395 			return KERN_INVALID_ARGUMENT;
7396 		}
7397 
7398 #ifndef CONFIG_NOMONITORS
7399 		ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7400 		    (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7401 		ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7402 		ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7403 #endif /* CONFIG_NOMONITORS */
7404 	} else if (*flags & WAKEMON_DISABLE) {
7405 		/*
7406 		 * Caller wishes to disable wakeups monitor on the task.
7407 		 *
7408 		 * Disable telemetry if it was triggered by the wakeups monitor, and
7409 		 * remove the limit & callback on the wakeups ledger entry.
7410 		 */
7411 #if CONFIG_TELEMETRY
7412 		telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
7413 #endif
7414 		ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
7415 		ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
7416 	}
7417 
7418 	task_unlock(task);
7419 	return KERN_SUCCESS;
7420 }
7421 
7422 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)7423 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7424 {
7425 	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7426 #if CONFIG_TELEMETRY
7427 		/*
7428 		 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
7429 		 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
7430 		 */
7431 		telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
7432 #endif
7433 		return;
7434 	}
7435 
7436 #if CONFIG_TELEMETRY
7437 	/*
7438 	 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
7439 	 * exceeded the limit, turn telemetry off for the task.
7440 	 */
7441 	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
7442 #endif
7443 
7444 	if (warning == 0) {
7445 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
7446 	}
7447 }
7448 
7449 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)7450 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
7451 {
7452 	task_t                      task        = current_task();
7453 	int                         pid         = 0;
7454 	const char                  *procname   = "unknown";
7455 	boolean_t                   fatal;
7456 	kern_return_t               kr;
7457 #ifdef EXC_RESOURCE_MONITORS
7458 	mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
7459 #endif /* EXC_RESOURCE_MONITORS */
7460 	struct ledger_entry_info    lei;
7461 
7462 #ifdef MACH_BSD
7463 	pid = proc_selfpid();
7464 	if (get_bsdtask_info(task) != NULL) {
7465 		procname = proc_name_address(get_bsdtask_info(current_task()));
7466 	}
7467 #endif
7468 
7469 	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
7470 
7471 	/*
7472 	 * Disable the exception notification so we don't overwhelm
7473 	 * the listener with an endless stream of redundant exceptions.
7474 	 * TODO: detect whether another thread is already reporting the violation.
7475 	 */
7476 	uint32_t flags = WAKEMON_DISABLE;
7477 	task_wakeups_monitor_ctl(task, &flags, NULL);
7478 
7479 	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7480 	trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
7481 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
7482 	    "over ~%llu seconds, averaging %llu wakes / second and "
7483 	    "violating a %slimit of %llu wakes over %llu seconds.\n",
7484 	    procname, pid,
7485 	    lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
7486 	    lei.lei_last_refill == 0 ? 0 :
7487 	    (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
7488 	    fatal ? "FATAL " : "",
7489 	    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
7490 
7491 	kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
7492 	    fatal ? kRNFatalLimitFlag : 0);
7493 	if (kr) {
7494 		printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
7495 	}
7496 
7497 #ifdef EXC_RESOURCE_MONITORS
7498 	if (disable_exc_resource) {
7499 		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7500 		    "supressed by a boot-arg\n", procname, pid);
7501 		return;
7502 	}
7503 	if (audio_active) {
7504 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7505 		    "supressed due to audio playback\n", procname, pid);
7506 		return;
7507 	}
7508 	if (lei.lei_last_refill == 0) {
7509 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7510 		    "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
7511 	}
7512 
7513 	code[0] = code[1] = 0;
7514 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
7515 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
7516 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
7517 	    NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
7518 	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
7519 	    lei.lei_last_refill);
7520 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
7521 	    NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
7522 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7523 #endif /* EXC_RESOURCE_MONITORS */
7524 
7525 	if (fatal) {
7526 		task_terminate_internal(task);
7527 	}
7528 }
7529 
7530 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)7531 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
7532 {
7533 	int64_t old_count, new_count;
7534 	boolean_t needs_telemetry;
7535 
7536 	do {
7537 		new_count = old_count = *global_write_count;
7538 		new_count += io_delta;
7539 		if (new_count >= io_telemetry_limit) {
7540 			new_count = 0;
7541 			needs_telemetry = TRUE;
7542 		} else {
7543 			needs_telemetry = FALSE;
7544 		}
7545 	} while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
7546 	return needs_telemetry;
7547 }
7548 
7549 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)7550 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
7551 {
7552 #if CONFIG_PHYS_WRITE_ACCT
7553 	if (!io_size) {
7554 		return;
7555 	}
7556 
7557 	/*
7558 	 * task == NULL means that we have to update kernel_task ledgers
7559 	 */
7560 	if (!task) {
7561 		task = kernel_task;
7562 	}
7563 
7564 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
7565 	    task_pid(task), flavor, io_size, flags, 0);
7566 	DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
7567 
7568 	if (flags & TASK_BALANCE_CREDIT) {
7569 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7570 			OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7571 			ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7572 		}
7573 	} else if (flags & TASK_BALANCE_DEBIT) {
7574 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7575 			OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7576 			ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7577 		}
7578 	}
7579 #endif /* CONFIG_PHYS_WRITE_ACCT */
7580 }
7581 
7582 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)7583 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
7584 {
7585 	int64_t io_delta = 0;
7586 	int64_t * global_counter_to_update;
7587 	boolean_t needs_telemetry = FALSE;
7588 	boolean_t is_external_device = FALSE;
7589 	int ledger_to_update = 0;
7590 	struct task_writes_counters * writes_counters_to_update;
7591 
7592 	if ((!task) || (!io_size) || (!vp)) {
7593 		return;
7594 	}
7595 
7596 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
7597 	    task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
7598 	DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
7599 
7600 	// Is the drive backing this vnode internal or external to the system?
7601 	if (vnode_isonexternalstorage(vp) == false) {
7602 		global_counter_to_update = &global_logical_writes_count;
7603 		ledger_to_update = task_ledgers.logical_writes;
7604 		writes_counters_to_update = &task->task_writes_counters_internal;
7605 		is_external_device = FALSE;
7606 	} else {
7607 		global_counter_to_update = &global_logical_writes_to_external_count;
7608 		ledger_to_update = task_ledgers.logical_writes_to_external;
7609 		writes_counters_to_update = &task->task_writes_counters_external;
7610 		is_external_device = TRUE;
7611 	}
7612 
7613 	switch (flags) {
7614 	case TASK_WRITE_IMMEDIATE:
7615 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
7616 		ledger_credit(task->ledger, ledger_to_update, io_size);
7617 		if (!is_external_device) {
7618 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7619 		}
7620 		break;
7621 	case TASK_WRITE_DEFERRED:
7622 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
7623 		ledger_credit(task->ledger, ledger_to_update, io_size);
7624 		if (!is_external_device) {
7625 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7626 		}
7627 		break;
7628 	case TASK_WRITE_INVALIDATED:
7629 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
7630 		ledger_debit(task->ledger, ledger_to_update, io_size);
7631 		if (!is_external_device) {
7632 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
7633 		}
7634 		break;
7635 	case TASK_WRITE_METADATA:
7636 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
7637 		ledger_credit(task->ledger, ledger_to_update, io_size);
7638 		if (!is_external_device) {
7639 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7640 		}
7641 		break;
7642 	}
7643 
7644 	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
7645 	if (io_telemetry_limit != 0) {
7646 		/* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
7647 		needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
7648 		if (needs_telemetry && !is_external_device) {
7649 			act_set_io_telemetry_ast(current_thread());
7650 		}
7651 	}
7652 }
7653 
7654 /*
7655  * Control the I/O monitor for a task.
7656  */
7657 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)7658 task_io_monitor_ctl(task_t task, uint32_t *flags)
7659 {
7660 	ledger_t ledger = task->ledger;
7661 
7662 	task_lock(task);
7663 	if (*flags & IOMON_ENABLE) {
7664 		/* Configure the physical I/O ledger */
7665 		ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
7666 		ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
7667 	} else if (*flags & IOMON_DISABLE) {
7668 		/*
7669 		 * Caller wishes to disable I/O monitor on the task.
7670 		 */
7671 		ledger_disable_refill(ledger, task_ledgers.physical_writes);
7672 		ledger_disable_callback(ledger, task_ledgers.physical_writes);
7673 	}
7674 
7675 	task_unlock(task);
7676 	return KERN_SUCCESS;
7677 }
7678 
7679 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)7680 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
7681 {
7682 	if (warning == 0) {
7683 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
7684 	}
7685 }
7686 
7687 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)7688 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
7689 {
7690 	int                             pid = 0;
7691 	task_t                          task = current_task();
7692 #ifdef EXC_RESOURCE_MONITORS
7693 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7694 #endif /* EXC_RESOURCE_MONITORS */
7695 	struct ledger_entry_info        lei = {};
7696 	kern_return_t                   kr;
7697 
7698 #ifdef MACH_BSD
7699 	pid = proc_selfpid();
7700 #endif
7701 	/*
7702 	 * Get the ledger entry info. We need to do this before disabling the exception
7703 	 * to get correct values for all fields.
7704 	 */
7705 	switch (flavor) {
7706 	case FLAVOR_IO_PHYSICAL_WRITES:
7707 		ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
7708 		break;
7709 	}
7710 
7711 
7712 	/*
7713 	 * Disable the exception notification so we don't overwhelm
7714 	 * the listener with an endless stream of redundant exceptions.
7715 	 * TODO: detect whether another thread is already reporting the violation.
7716 	 */
7717 	uint32_t flags = IOMON_DISABLE;
7718 	task_io_monitor_ctl(task, &flags);
7719 
7720 	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
7721 		trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
7722 	}
7723 	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
7724 	    pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
7725 
7726 	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
7727 	if (kr) {
7728 		printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
7729 	}
7730 
7731 #ifdef EXC_RESOURCE_MONITORS
7732 	code[0] = code[1] = 0;
7733 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
7734 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
7735 	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
7736 	EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
7737 	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
7738 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7739 #endif /* EXC_RESOURCE_MONITORS */
7740 }
7741 
7742 void
task_port_space_ast(__unused task_t task)7743 task_port_space_ast(__unused task_t task)
7744 {
7745 	uint32_t current_size, soft_limit, hard_limit;
7746 	assert(task == current_task());
7747 	kern_return_t ret = ipc_space_get_table_size_and_limits(task->itk_space,
7748 	    &current_size, &soft_limit, &hard_limit);
7749 	if (ret == KERN_SUCCESS) {
7750 		SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
7751 	}
7752 }
7753 
7754 #if CONFIG_PROC_RESOURCE_LIMITS
7755 static mach_port_t
task_allocate_fatal_port(void)7756 task_allocate_fatal_port(void)
7757 {
7758 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7759 	task_id_token_t token;
7760 
7761 	kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
7762 	if (kr) {
7763 		return MACH_PORT_NULL;
7764 	}
7765 	task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
7766 	    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
7767 
7768 	task_id_token_set_port(token, task_fatal_port);
7769 
7770 	return task_fatal_port;
7771 }
7772 
7773 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)7774 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
7775 {
7776 	task_t task = TASK_NULL;
7777 	kern_return_t kr;
7778 
7779 	task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
7780 
7781 	assert(token != NULL);
7782 	if (token) {
7783 		kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
7784 		if (task) {
7785 			task_bsdtask_kill(task);
7786 			task_deallocate(task);
7787 		}
7788 		task_id_token_release(token); /* consumes ref given by notification */
7789 	}
7790 }
7791 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7792 
7793 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)7794 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
7795 {
7796 	int pid = 0;
7797 	char *procname = (char *) "unknown";
7798 	__unused kern_return_t kr;
7799 	__unused resource_notify_flags_t flags = kRNFlagsNone;
7800 	__unused uint32_t limit;
7801 	__unused mach_port_t task_fatal_port = MACH_PORT_NULL;
7802 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7803 
7804 #ifdef MACH_BSD
7805 	pid = proc_selfpid();
7806 	if (get_bsdtask_info(task) != NULL) {
7807 		procname = proc_name_address(get_bsdtask_info(task));
7808 	}
7809 #endif
7810 	/*
7811 	 * Only kernel_task and launchd may be allowed to
7812 	 * have really large ipc space.
7813 	 */
7814 	if (pid == 0 || pid == 1) {
7815 		return;
7816 	}
7817 
7818 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
7819 	    Num of ports allocated %u; \n", procname, pid, current_size);
7820 
7821 	/* Abort the process if it has hit the system-wide limit for ipc port table size */
7822 	if (!hard_limit && !soft_limit) {
7823 		code[0] = code[1] = 0;
7824 		EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
7825 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
7826 		EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
7827 
7828 		exit_with_port_space_exception(current_proc(), code[0], code[1]);
7829 
7830 		return;
7831 	}
7832 
7833 #if CONFIG_PROC_RESOURCE_LIMITS
7834 	if (hard_limit > 0) {
7835 		flags |= kRNHardLimitFlag;
7836 		limit = hard_limit;
7837 		task_fatal_port = task_allocate_fatal_port();
7838 		if (!task_fatal_port) {
7839 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
7840 			task_bsdtask_kill(task);
7841 		}
7842 	} else {
7843 		flags |= kRNSoftLimitFlag;
7844 		limit = soft_limit;
7845 	}
7846 
7847 	kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
7848 	if (kr) {
7849 		os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
7850 	}
7851 	if (task_fatal_port) {
7852 		ipc_port_release_send(task_fatal_port);
7853 	}
7854 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7855 }
7856 
7857 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)7858 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
7859 {
7860 #if CONFIG_PROC_RESOURCE_LIMITS
7861 	assert(task == current_task());
7862 	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
7863 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7864 }
7865 
7866 #if CONFIG_PROC_RESOURCE_LIMITS
7867 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)7868 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
7869 {
7870 	int pid = 0;
7871 	char *procname = (char *) "unknown";
7872 	kern_return_t kr;
7873 	resource_notify_flags_t flags = kRNFlagsNone;
7874 	int limit;
7875 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7876 
7877 #ifdef MACH_BSD
7878 	pid = proc_selfpid();
7879 	if (get_bsdtask_info(task) != NULL) {
7880 		procname = proc_name_address(get_bsdtask_info(task));
7881 	}
7882 #endif
7883 	/*
7884 	 * Only kernel_task and launchd may be allowed to
7885 	 * have really large ipc space.
7886 	 */
7887 	if (pid == 0 || pid == 1) {
7888 		return;
7889 	}
7890 
7891 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
7892 	    Num of fds allocated %u; \n", procname, pid, current_size);
7893 
7894 	if (hard_limit > 0) {
7895 		flags |= kRNHardLimitFlag;
7896 		limit = hard_limit;
7897 		task_fatal_port = task_allocate_fatal_port();
7898 		if (!task_fatal_port) {
7899 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
7900 			task_bsdtask_kill(task);
7901 		}
7902 	} else {
7903 		flags |= kRNSoftLimitFlag;
7904 		limit = soft_limit;
7905 	}
7906 
7907 	kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
7908 	if (kr) {
7909 		os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
7910 	}
7911 	if (task_fatal_port) {
7912 		ipc_port_release_send(task_fatal_port);
7913 	}
7914 }
7915 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7916 
7917 /* Placeholders for the task set/get voucher interfaces */
7918 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)7919 task_get_mach_voucher(
7920 	task_t                  task,
7921 	mach_voucher_selector_t __unused which,
7922 	ipc_voucher_t           *voucher)
7923 {
7924 	if (TASK_NULL == task) {
7925 		return KERN_INVALID_TASK;
7926 	}
7927 
7928 	*voucher = NULL;
7929 	return KERN_SUCCESS;
7930 }
7931 
7932 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)7933 task_set_mach_voucher(
7934 	task_t                  task,
7935 	ipc_voucher_t           __unused voucher)
7936 {
7937 	if (TASK_NULL == task) {
7938 		return KERN_INVALID_TASK;
7939 	}
7940 
7941 	return KERN_SUCCESS;
7942 }
7943 
7944 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)7945 task_swap_mach_voucher(
7946 	__unused task_t         task,
7947 	__unused ipc_voucher_t  new_voucher,
7948 	ipc_voucher_t          *in_out_old_voucher)
7949 {
7950 	/*
7951 	 * Currently this function is only called from a MIG generated
7952 	 * routine which doesn't release the reference on the voucher
7953 	 * addressed by in_out_old_voucher. To avoid leaking this reference,
7954 	 * a call to release it has been added here.
7955 	 */
7956 	ipc_voucher_release(*in_out_old_voucher);
7957 	OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
7958 }
7959 
7960 void
task_set_gpu_denied(task_t task,boolean_t denied)7961 task_set_gpu_denied(task_t task, boolean_t denied)
7962 {
7963 	task_lock(task);
7964 
7965 	if (denied) {
7966 		task->t_flags |= TF_GPU_DENIED;
7967 	} else {
7968 		task->t_flags &= ~TF_GPU_DENIED;
7969 	}
7970 
7971 	task_unlock(task);
7972 }
7973 
7974 boolean_t
task_is_gpu_denied(task_t task)7975 task_is_gpu_denied(task_t task)
7976 {
7977 	/* We don't need the lock to read this flag */
7978 	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
7979 }
7980 
7981 
7982 uint64_t
get_task_memory_region_count(task_t task)7983 get_task_memory_region_count(task_t task)
7984 {
7985 	vm_map_t map;
7986 	map = (task == kernel_task) ? kernel_map: task->map;
7987 	return (uint64_t)get_map_nentries(map);
7988 }
7989 
7990 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)7991 kdebug_trace_dyld_internal(uint32_t base_code,
7992     struct dyld_kernel_image_info *info)
7993 {
7994 	static_assert(sizeof(info->uuid) >= 16);
7995 
7996 #if defined(__LP64__)
7997 	uint64_t *uuid = (uint64_t *)&(info->uuid);
7998 
7999 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8000 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8001 	    uuid[1], info->load_addr,
8002 	    (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8003 	    0);
8004 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8005 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8006 	    (uint64_t)info->fsobjid.fid_objno |
8007 	    ((uint64_t)info->fsobjid.fid_generation << 32),
8008 	    0, 0, 0, 0);
8009 #else /* defined(__LP64__) */
8010 	uint32_t *uuid = (uint32_t *)&(info->uuid);
8011 
8012 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8013 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8014 	    uuid[1], uuid[2], uuid[3], 0);
8015 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8016 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8017 	    (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8018 	    info->fsobjid.fid_objno, 0);
8019 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8020 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8021 	    info->fsobjid.fid_generation, 0, 0, 0, 0);
8022 #endif /* !defined(__LP64__) */
8023 }
8024 
8025 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8026 kdebug_trace_dyld(task_t task, uint32_t base_code,
8027     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8028 {
8029 	kern_return_t kr;
8030 	dyld_kernel_image_info_array_t infos;
8031 	vm_map_offset_t map_data;
8032 	vm_offset_t data;
8033 
8034 	if (!infos_copy) {
8035 		return KERN_INVALID_ADDRESS;
8036 	}
8037 
8038 	if (!kdebug_enable ||
8039 	    !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8040 		vm_map_copy_discard(infos_copy);
8041 		return KERN_SUCCESS;
8042 	}
8043 
8044 	if (task == NULL || task != current_task()) {
8045 		return KERN_INVALID_TASK;
8046 	}
8047 
8048 	kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8049 	if (kr != KERN_SUCCESS) {
8050 		return kr;
8051 	}
8052 
8053 	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8054 
8055 	for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8056 		kdebug_trace_dyld_internal(base_code, &(infos[i]));
8057 	}
8058 
8059 	data = CAST_DOWN(vm_offset_t, map_data);
8060 	mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8061 	return KERN_SUCCESS;
8062 }
8063 
8064 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8065 task_register_dyld_image_infos(task_t task,
8066     dyld_kernel_image_info_array_t infos_copy,
8067     mach_msg_type_number_t infos_len)
8068 {
8069 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8070 	           (vm_map_copy_t)infos_copy, infos_len);
8071 }
8072 
8073 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8074 task_unregister_dyld_image_infos(task_t task,
8075     dyld_kernel_image_info_array_t infos_copy,
8076     mach_msg_type_number_t infos_len)
8077 {
8078 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8079 	           (vm_map_copy_t)infos_copy, infos_len);
8080 }
8081 
8082 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8083 task_get_dyld_image_infos(__unused task_t task,
8084     __unused dyld_kernel_image_info_array_t * dyld_images,
8085     __unused mach_msg_type_number_t * dyld_imagesCnt)
8086 {
8087 	return KERN_NOT_SUPPORTED;
8088 }
8089 
8090 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8091 task_register_dyld_shared_cache_image_info(task_t task,
8092     dyld_kernel_image_info_t cache_img,
8093     __unused boolean_t no_cache,
8094     __unused boolean_t private_cache)
8095 {
8096 	if (task == NULL || task != current_task()) {
8097 		return KERN_INVALID_TASK;
8098 	}
8099 
8100 	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8101 	return KERN_SUCCESS;
8102 }
8103 
8104 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8105 task_register_dyld_set_dyld_state(__unused task_t task,
8106     __unused uint8_t dyld_state)
8107 {
8108 	return KERN_NOT_SUPPORTED;
8109 }
8110 
8111 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8112 task_register_dyld_get_process_state(__unused task_t task,
8113     __unused dyld_kernel_process_info_t * dyld_process_state)
8114 {
8115 	return KERN_NOT_SUPPORTED;
8116 }
8117 
8118 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8119 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8120     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8121 {
8122 #if CONFIG_PERVASIVE_CPI
8123 	task_t task = (task_t)task_insp;
8124 	kern_return_t kr = KERN_SUCCESS;
8125 	mach_msg_type_number_t size;
8126 
8127 	if (task == TASK_NULL) {
8128 		return KERN_INVALID_ARGUMENT;
8129 	}
8130 
8131 	size = *size_in_out;
8132 
8133 	switch (flavor) {
8134 	case TASK_INSPECT_BASIC_COUNTS: {
8135 		struct task_inspect_basic_counts *bc =
8136 		    (struct task_inspect_basic_counts *)info_out;
8137 		struct recount_usage stats = { 0 };
8138 		if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8139 			kr = KERN_INVALID_ARGUMENT;
8140 			break;
8141 		}
8142 
8143 		recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8144 		bc->instructions = stats.ru_instructions;
8145 		bc->cycles = stats.ru_cycles;
8146 		size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8147 		break;
8148 	}
8149 	default:
8150 		kr = KERN_INVALID_ARGUMENT;
8151 		break;
8152 	}
8153 
8154 	if (kr == KERN_SUCCESS) {
8155 		*size_in_out = size;
8156 	}
8157 	return kr;
8158 #else /* CONFIG_PERVASIVE_CPI */
8159 #pragma unused(task_insp, flavor, info_out, size_in_out)
8160 	return KERN_NOT_SUPPORTED;
8161 #endif /* !CONFIG_PERVASIVE_CPI */
8162 }
8163 
8164 #if CONFIG_SECLUDED_MEMORY
8165 int num_tasks_can_use_secluded_mem = 0;
8166 
8167 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8168 task_set_can_use_secluded_mem(
8169 	task_t          task,
8170 	boolean_t       can_use_secluded_mem)
8171 {
8172 	if (!task->task_could_use_secluded_mem) {
8173 		return;
8174 	}
8175 	task_lock(task);
8176 	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8177 	task_unlock(task);
8178 }
8179 
8180 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8181 task_set_can_use_secluded_mem_locked(
8182 	task_t          task,
8183 	boolean_t       can_use_secluded_mem)
8184 {
8185 	assert(task->task_could_use_secluded_mem);
8186 	if (can_use_secluded_mem &&
8187 	    secluded_for_apps &&         /* global boot-arg */
8188 	    !task->task_can_use_secluded_mem) {
8189 		assert(num_tasks_can_use_secluded_mem >= 0);
8190 		OSAddAtomic(+1,
8191 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8192 		task->task_can_use_secluded_mem = TRUE;
8193 	} else if (!can_use_secluded_mem &&
8194 	    task->task_can_use_secluded_mem) {
8195 		assert(num_tasks_can_use_secluded_mem > 0);
8196 		OSAddAtomic(-1,
8197 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8198 		task->task_can_use_secluded_mem = FALSE;
8199 	}
8200 }
8201 
8202 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8203 task_set_could_use_secluded_mem(
8204 	task_t          task,
8205 	boolean_t       could_use_secluded_mem)
8206 {
8207 	task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8208 }
8209 
8210 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8211 task_set_could_also_use_secluded_mem(
8212 	task_t          task,
8213 	boolean_t       could_also_use_secluded_mem)
8214 {
8215 	task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8216 }
8217 
8218 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8219 task_can_use_secluded_mem(
8220 	task_t          task,
8221 	boolean_t       is_alloc)
8222 {
8223 	if (task->task_can_use_secluded_mem) {
8224 		assert(task->task_could_use_secluded_mem);
8225 		assert(num_tasks_can_use_secluded_mem > 0);
8226 		return TRUE;
8227 	}
8228 	if (task->task_could_also_use_secluded_mem &&
8229 	    num_tasks_can_use_secluded_mem > 0) {
8230 		assert(num_tasks_can_use_secluded_mem > 0);
8231 		return TRUE;
8232 	}
8233 
8234 	/*
8235 	 * If a single task is using more than some large amount of
8236 	 * memory (i.e. secluded_shutoff_trigger) and is approaching
8237 	 * its task limit, allow it to dip into secluded and begin
8238 	 * suppression of rebuilding secluded memory until that task exits.
8239 	 */
8240 	if (is_alloc && secluded_shutoff_trigger != 0) {
8241 		uint64_t phys_used = get_task_phys_footprint(task);
8242 		uint64_t limit = get_task_phys_footprint_limit(task);
8243 		if (phys_used > secluded_shutoff_trigger &&
8244 		    limit > secluded_shutoff_trigger &&
8245 		    phys_used > limit - secluded_shutoff_headroom) {
8246 			start_secluded_suppression(task);
8247 			return TRUE;
8248 		}
8249 	}
8250 
8251 	return FALSE;
8252 }
8253 
8254 boolean_t
task_could_use_secluded_mem(task_t task)8255 task_could_use_secluded_mem(
8256 	task_t  task)
8257 {
8258 	return task->task_could_use_secluded_mem;
8259 }
8260 
8261 boolean_t
task_could_also_use_secluded_mem(task_t task)8262 task_could_also_use_secluded_mem(
8263 	task_t  task)
8264 {
8265 	return task->task_could_also_use_secluded_mem;
8266 }
8267 #endif /* CONFIG_SECLUDED_MEMORY */
8268 
8269 queue_head_t *
task_io_user_clients(task_t task)8270 task_io_user_clients(task_t task)
8271 {
8272 	return &task->io_user_clients;
8273 }
8274 
8275 void
task_set_message_app_suspended(task_t task,boolean_t enable)8276 task_set_message_app_suspended(task_t task, boolean_t enable)
8277 {
8278 	task->message_app_suspended = enable;
8279 }
8280 
8281 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8282 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8283 {
8284 	dst_task->vtimers = src_task->vtimers;
8285 }
8286 
8287 #if DEVELOPMENT || DEBUG
8288 int vm_region_footprint = 0;
8289 #endif /* DEVELOPMENT || DEBUG */
8290 
8291 boolean_t
task_self_region_footprint(void)8292 task_self_region_footprint(void)
8293 {
8294 #if DEVELOPMENT || DEBUG
8295 	if (vm_region_footprint) {
8296 		/* system-wide override */
8297 		return TRUE;
8298 	}
8299 #endif /* DEVELOPMENT || DEBUG */
8300 	return current_task()->task_region_footprint;
8301 }
8302 
8303 void
task_self_region_footprint_set(boolean_t newval)8304 task_self_region_footprint_set(
8305 	boolean_t newval)
8306 {
8307 	task_t  curtask;
8308 
8309 	curtask = current_task();
8310 	task_lock(curtask);
8311 	if (newval) {
8312 		curtask->task_region_footprint = TRUE;
8313 	} else {
8314 		curtask->task_region_footprint = FALSE;
8315 	}
8316 	task_unlock(curtask);
8317 }
8318 
8319 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)8320 task_set_darkwake_mode(task_t task, boolean_t set_mode)
8321 {
8322 	assert(task);
8323 
8324 	task_lock(task);
8325 
8326 	if (set_mode) {
8327 		task->t_flags |= TF_DARKWAKE_MODE;
8328 	} else {
8329 		task->t_flags &= ~(TF_DARKWAKE_MODE);
8330 	}
8331 
8332 	task_unlock(task);
8333 }
8334 
8335 boolean_t
task_get_darkwake_mode(task_t task)8336 task_get_darkwake_mode(task_t task)
8337 {
8338 	assert(task);
8339 	return (task->t_flags & TF_DARKWAKE_MODE) != 0;
8340 }
8341 
8342 /*
8343  * Set default behavior for task's control port and EXC_GUARD variants that have
8344  * settable behavior.
8345  *
8346  * Platform binaries typically have one behavior, third parties another -
8347  * but there are special exception we may need to account for.
8348  */
8349 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)8350 task_set_exc_guard_ctrl_port_default(
8351 	task_t task,
8352 	thread_t main_thread,
8353 	const char *name,
8354 	unsigned int namelen,
8355 	boolean_t is_simulated,
8356 	uint32_t platform,
8357 	uint32_t sdk)
8358 {
8359 	task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8360 
8361 	if (task_get_platform_binary(task)) {
8362 		/* set exc guard default behavior for first-party code */
8363 		task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
8364 
8365 		if (1 == task_pid(task)) {
8366 			/* special flags for inittask - delivery every instance as corpse */
8367 			task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
8368 		} else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
8369 			/* honor by-name default setting overrides */
8370 
8371 			int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
8372 
8373 			for (int i = 0; i < count; i++) {
8374 				const struct task_exc_guard_named_default *named_default =
8375 				    &task_exc_guard_named_defaults[i];
8376 				if (strncmp(named_default->name, name, namelen) == 0 &&
8377 				    strlen(named_default->name) == namelen) {
8378 					task->task_exc_guard = named_default->behavior;
8379 					break;
8380 				}
8381 			}
8382 		}
8383 
8384 		/* set control port options for 1p code, inherited from parent task by default */
8385 		opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
8386 	} else {
8387 		/* set exc guard default behavior for third-party code */
8388 		task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
8389 		/* set control port options for 3p code, inherited from parent task by default */
8390 		opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
8391 	}
8392 
8393 	if (is_simulated) {
8394 		/* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
8395 		if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
8396 		    (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
8397 		    (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
8398 			task->task_exc_guard = TASK_EXC_GUARD_NONE;
8399 		}
8400 		/* Disable protection for control ports for simulated binaries */
8401 		opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8402 	}
8403 
8404 
8405 	task_set_control_port_options(task, opts);
8406 
8407 	task_set_immovable_pinned(task);
8408 	main_thread_set_immovable_pinned(main_thread);
8409 }
8410 
8411 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)8412 task_get_exc_guard_behavior(
8413 	task_t task,
8414 	task_exc_guard_behavior_t *behaviorp)
8415 {
8416 	if (task == TASK_NULL) {
8417 		return KERN_INVALID_TASK;
8418 	}
8419 	*behaviorp = task->task_exc_guard;
8420 	return KERN_SUCCESS;
8421 }
8422 
8423 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)8424 task_set_exc_guard_behavior(
8425 	task_t task,
8426 	task_exc_guard_behavior_t new_behavior)
8427 {
8428 	if (task == TASK_NULL) {
8429 		return KERN_INVALID_TASK;
8430 	}
8431 	if (new_behavior & ~TASK_EXC_GUARD_ALL) {
8432 		return KERN_INVALID_VALUE;
8433 	}
8434 
8435 	/* limit setting to that allowed for this config */
8436 	new_behavior = new_behavior & task_exc_guard_config_mask;
8437 
8438 #if !defined (DEBUG) && !defined (DEVELOPMENT)
8439 	/* On release kernels, only allow _upgrading_ exc guard behavior */
8440 	task_exc_guard_behavior_t cur_behavior;
8441 
8442 	os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
8443 		if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
8444 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8445 		}
8446 
8447 		if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
8448 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8449 		}
8450 
8451 		/* no restrictions on CORPSE bit */
8452 	});
8453 #else
8454 	task->task_exc_guard = new_behavior;
8455 #endif
8456 	return KERN_SUCCESS;
8457 }
8458 
8459 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)8460 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
8461 {
8462 #if DEVELOPMENT || DEBUG
8463 	if (task == TASK_NULL) {
8464 		return KERN_INVALID_TASK;
8465 	}
8466 
8467 	task_lock(task);
8468 	if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
8469 		task->t_flags |= TF_NO_CORPSE_FORKING;
8470 	} else {
8471 		task->t_flags &= ~TF_NO_CORPSE_FORKING;
8472 	}
8473 	task_unlock(task);
8474 
8475 	return KERN_SUCCESS;
8476 #else
8477 	(void)task;
8478 	(void)behavior;
8479 	return KERN_NOT_SUPPORTED;
8480 #endif
8481 }
8482 
8483 boolean_t
task_corpse_forking_disabled(task_t task)8484 task_corpse_forking_disabled(task_t task)
8485 {
8486 	boolean_t disabled = FALSE;
8487 
8488 	task_lock(task);
8489 	disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
8490 	task_unlock(task);
8491 
8492 	return disabled;
8493 }
8494 
8495 #if __arm64__
8496 extern int legacy_footprint_entitlement_mode;
8497 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
8498 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
8499 
8500 
8501 void
task_set_legacy_footprint(task_t task)8502 task_set_legacy_footprint(
8503 	task_t task)
8504 {
8505 	task_lock(task);
8506 	task->task_legacy_footprint = TRUE;
8507 	task_unlock(task);
8508 }
8509 
8510 void
task_set_extra_footprint_limit(task_t task)8511 task_set_extra_footprint_limit(
8512 	task_t task)
8513 {
8514 	if (task->task_extra_footprint_limit) {
8515 		return;
8516 	}
8517 	task_lock(task);
8518 	if (task->task_extra_footprint_limit) {
8519 		task_unlock(task);
8520 		return;
8521 	}
8522 	task->task_extra_footprint_limit = TRUE;
8523 	task_unlock(task);
8524 	memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
8525 }
8526 
8527 void
task_set_ios13extended_footprint_limit(task_t task)8528 task_set_ios13extended_footprint_limit(
8529 	task_t task)
8530 {
8531 	if (task->task_ios13extended_footprint_limit) {
8532 		return;
8533 	}
8534 	task_lock(task);
8535 	if (task->task_ios13extended_footprint_limit) {
8536 		task_unlock(task);
8537 		return;
8538 	}
8539 	task->task_ios13extended_footprint_limit = TRUE;
8540 	task_unlock(task);
8541 	memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
8542 }
8543 #endif /* __arm64__ */
8544 
8545 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)8546 task_ledger_get_balance(
8547 	ledger_t        ledger,
8548 	int             ledger_idx)
8549 {
8550 	ledger_amount_t amount;
8551 	amount = 0;
8552 	ledger_get_balance(ledger, ledger_idx, &amount);
8553 	return amount;
8554 }
8555 
8556 /*
8557  * Gather the amount of memory counted in a task's footprint due to
8558  * being in a specific set of ledgers.
8559  */
8560 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)8561 task_ledgers_footprint(
8562 	ledger_t        ledger,
8563 	ledger_amount_t *ledger_resident,
8564 	ledger_amount_t *ledger_compressed)
8565 {
8566 	*ledger_resident = 0;
8567 	*ledger_compressed = 0;
8568 
8569 	/* purgeable non-volatile memory */
8570 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
8571 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
8572 
8573 	/* "default" tagged memory */
8574 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
8575 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
8576 
8577 	/* "network" currently never counts in the footprint... */
8578 
8579 	/* "media" tagged memory */
8580 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
8581 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
8582 
8583 	/* "graphics" tagged memory */
8584 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
8585 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
8586 
8587 	/* "neural" tagged memory */
8588 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
8589 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
8590 }
8591 
8592 #if CONFIG_MEMORYSTATUS
8593 /*
8594  * Credit any outstanding task dirty time to the ledger.
8595  * memstat_dirty_start is pushed forward to prevent any possibility of double
8596  * counting, making it safe to call this as often as necessary to ensure that
8597  * anyone reading the ledger gets up-to-date information.
8598  */
8599 void
task_ledger_settle_dirty_time(task_t t)8600 task_ledger_settle_dirty_time(task_t t)
8601 {
8602 	task_lock(t);
8603 
8604 	uint64_t start = t->memstat_dirty_start;
8605 	if (start) {
8606 		uint64_t now = mach_absolute_time();
8607 
8608 		uint64_t duration;
8609 		absolutetime_to_nanoseconds(now - start, &duration);
8610 
8611 		ledger_t ledger = get_task_ledger(t);
8612 		ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
8613 
8614 		t->memstat_dirty_start = now;
8615 	}
8616 
8617 	task_unlock(t);
8618 }
8619 #endif /* CONFIG_MEMORYSTATUS */
8620 
8621 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)8622 task_set_memory_ownership_transfer(
8623 	task_t    task,
8624 	boolean_t value)
8625 {
8626 	task_lock(task);
8627 	task->task_can_transfer_memory_ownership = !!value;
8628 	task_unlock(task);
8629 }
8630 
8631 #if DEVELOPMENT || DEBUG
8632 
8633 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)8634 task_set_no_footprint_for_debug(task_t task, boolean_t value)
8635 {
8636 	task_lock(task);
8637 	task->task_no_footprint_for_debug = !!value;
8638 	task_unlock(task);
8639 }
8640 
8641 int
task_get_no_footprint_for_debug(task_t task)8642 task_get_no_footprint_for_debug(task_t task)
8643 {
8644 	return task->task_no_footprint_for_debug;
8645 }
8646 
8647 #endif /* DEVELOPMENT || DEBUG */
8648 
8649 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)8650 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
8651 {
8652 	vm_object_t find_vmo;
8653 	size_t size = 0;
8654 
8655 	task_objq_lock(task);
8656 	if (query != NULL) {
8657 		queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
8658 		{
8659 			vm_object_query_t p = &query[size++];
8660 
8661 			/* make sure to not overrun */
8662 			if (size * sizeof(vm_object_query_data_t) > len) {
8663 				--size;
8664 				break;
8665 			}
8666 
8667 			bzero(p, sizeof(*p));
8668 			p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
8669 			p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
8670 			p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
8671 			p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
8672 			p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
8673 			p->vo_no_footprint = find_vmo->vo_no_footprint;
8674 			p->vo_ledger_tag = find_vmo->vo_ledger_tag;
8675 			p->purgable = find_vmo->purgable;
8676 
8677 			if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
8678 				p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
8679 			} else {
8680 				p->compressed_size = 0;
8681 			}
8682 		}
8683 	} else {
8684 		size = (size_t)task->task_owned_objects;
8685 	}
8686 	task_objq_unlock(task);
8687 
8688 	*num = size;
8689 }
8690 
8691 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)8692 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
8693 {
8694 	assert(output_size);
8695 	assert(entries);
8696 
8697 	/* copy the vmobjects and vmobject data out of the task */
8698 	if (buffer_size == 0) {
8699 		task_copy_vmobjects(task, NULL, 0, entries);
8700 		*output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
8701 	} else {
8702 		assert(buffer);
8703 		task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
8704 		buffer->entries = (uint64_t)*entries;
8705 		*output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
8706 	}
8707 }
8708 
8709 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)8710 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
8711 {
8712 	size_t buffer_size;
8713 	vmobject_list_output_t buffer;
8714 	size_t output_size;
8715 	size_t entries;
8716 
8717 	assert(to_task != from_task);
8718 
8719 	/* get the size, allocate a bufferr, and populate */
8720 	entries = 0;
8721 	output_size = 0;
8722 	task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
8723 
8724 	if (output_size) {
8725 		buffer_size = output_size;
8726 		buffer = kalloc_data(buffer_size, Z_WAITOK);
8727 
8728 		if (buffer) {
8729 			entries = 0;
8730 			output_size = 0;
8731 
8732 			task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
8733 
8734 			if (entries) {
8735 				to_task->corpse_vmobject_list = buffer;
8736 				to_task->corpse_vmobject_list_size = buffer_size;
8737 			}
8738 		}
8739 	}
8740 }
8741 
8742 void
task_set_filter_msg_flag(task_t task,boolean_t flag)8743 task_set_filter_msg_flag(
8744 	task_t task,
8745 	boolean_t flag)
8746 {
8747 	assert(task != TASK_NULL);
8748 
8749 	if (flag) {
8750 		task_ro_flags_set(task, TFRO_FILTER_MSG);
8751 	} else {
8752 		task_ro_flags_clear(task, TFRO_FILTER_MSG);
8753 	}
8754 }
8755 
8756 boolean_t
task_get_filter_msg_flag(task_t task)8757 task_get_filter_msg_flag(
8758 	task_t task)
8759 {
8760 	if (!task) {
8761 		return false;
8762 	}
8763 
8764 	return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
8765 }
8766 bool
task_is_exotic(task_t task)8767 task_is_exotic(
8768 	task_t task)
8769 {
8770 	if (task == TASK_NULL) {
8771 		return false;
8772 	}
8773 	return vm_map_is_exotic(get_task_map(task));
8774 }
8775 
8776 bool
task_is_alien(task_t task)8777 task_is_alien(
8778 	task_t task)
8779 {
8780 	if (task == TASK_NULL) {
8781 		return false;
8782 	}
8783 	return vm_map_is_alien(get_task_map(task));
8784 }
8785 
8786 
8787 
8788 #if CONFIG_MACF
8789 /* Set the filter mask for Mach traps. */
8790 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)8791 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
8792 {
8793 	assert(task);
8794 
8795 	task_set_mach_trap_filter_mask(task, maskptr);
8796 }
8797 
8798 /* Set the filter mask for kobject msgs. */
8799 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)8800 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
8801 {
8802 	assert(task);
8803 
8804 	task_set_mach_kobj_filter_mask(task, maskptr);
8805 }
8806 
8807 /* Hook for mach trap/sc filter evaluation policy. */
8808 mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate = NULL;
8809 
8810 /* Hook for kobj message filter evaluation policy. */
8811 mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate = NULL;
8812 
8813 /* Set the callback hooks for the filtering policy. */
8814 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)8815 mac_task_register_filter_callbacks(
8816 	const mac_task_mach_filter_cbfunc_t mach_cbfunc,
8817 	const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
8818 {
8819 	if (mach_cbfunc != NULL) {
8820 		if (mac_task_mach_trap_evaluate != NULL) {
8821 			return KERN_FAILURE;
8822 		}
8823 		mac_task_mach_trap_evaluate = mach_cbfunc;
8824 	}
8825 	if (kobj_cbfunc != NULL) {
8826 		if (mac_task_kobj_msg_evaluate != NULL) {
8827 			return KERN_FAILURE;
8828 		}
8829 		mac_task_kobj_msg_evaluate = kobj_cbfunc;
8830 	}
8831 
8832 	return KERN_SUCCESS;
8833 }
8834 #endif /* CONFIG_MACF */
8835 
8836 #if CONFIG_ROSETTA
8837 bool
task_is_translated(task_t task)8838 task_is_translated(task_t task)
8839 {
8840 	extern boolean_t proc_is_translated(struct proc* p);
8841 	return task && proc_is_translated(get_bsdtask_info(task));
8842 }
8843 #endif
8844 
8845 
8846 #if __has_feature(ptrauth_calls)
8847 /* All pac violations will be delivered as fatal exceptions irrespective of
8848  * the enable_pac_exception boot-arg value.
8849  */
8850 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
8851 /*
8852  * When enable_pac_exception boot-arg is set to true, processes
8853  * can choose to get non-fatal pac exception delivery by setting
8854  * this entitlement.
8855  */
8856 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
8857 
8858 void
task_set_pac_exception_fatal_flag(task_t task)8859 task_set_pac_exception_fatal_flag(
8860 	task_t task)
8861 {
8862 	assert(task != TASK_NULL);
8863 	bool pac_entitlement = false;
8864 	uint32_t set_flags = 0;
8865 
8866 	if (enable_pac_exception && IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
8867 		return;
8868 	}
8869 
8870 	if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
8871 		pac_entitlement = true;
8872 	}
8873 
8874 	if (pac_entitlement) {
8875 		set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
8876 	}
8877 	if (pac_entitlement || (enable_pac_exception && task_get_platform_binary(task))) {
8878 		set_flags |= TFRO_PAC_EXC_FATAL;
8879 	}
8880 	if (set_flags != 0) {
8881 		task_ro_flags_set(task, set_flags);
8882 	}
8883 }
8884 
8885 bool
task_is_pac_exception_fatal(task_t task)8886 task_is_pac_exception_fatal(
8887 	task_t task)
8888 {
8889 	assert(task != TASK_NULL);
8890 	return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
8891 }
8892 #endif /* __has_feature(ptrauth_calls) */
8893 
8894 bool
task_needs_user_signed_thread_state(task_t task)8895 task_needs_user_signed_thread_state(
8896 	task_t task)
8897 {
8898 	assert(task != TASK_NULL);
8899 	return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
8900 }
8901 
8902 void
task_set_tecs(task_t task)8903 task_set_tecs(task_t task)
8904 {
8905 	if (task == TASK_NULL) {
8906 		task = current_task();
8907 	}
8908 
8909 	if (!machine_csv(CPUVN_CI)) {
8910 		return;
8911 	}
8912 
8913 	LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
8914 
8915 	task_lock(task);
8916 
8917 	task->t_flags |= TF_TECS;
8918 
8919 	thread_t thread;
8920 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
8921 		machine_tecs(thread);
8922 	}
8923 	task_unlock(task);
8924 }
8925 
8926 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)8927 task_test_sync_upcall(
8928 	task_t     task,
8929 	ipc_port_t send_port)
8930 {
8931 #if DEVELOPMENT || DEBUG
8932 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
8933 		return KERN_INVALID_ARGUMENT;
8934 	}
8935 
8936 	/* Block on sync kernel upcall on the given send port */
8937 	mach_test_sync_upcall(send_port);
8938 
8939 	ipc_port_release_send(send_port);
8940 	return KERN_SUCCESS;
8941 #else
8942 	(void)task;
8943 	(void)send_port;
8944 	return KERN_NOT_SUPPORTED;
8945 #endif
8946 }
8947 
8948 kern_return_t
task_test_async_upcall_propagation(task_t task,ipc_port_t send_port,int qos,int iotier)8949 task_test_async_upcall_propagation(
8950 	task_t      task,
8951 	ipc_port_t  send_port,
8952 	int         qos,
8953 	int         iotier)
8954 {
8955 #if DEVELOPMENT || DEBUG
8956 	kern_return_t kr;
8957 
8958 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
8959 		return KERN_INVALID_ARGUMENT;
8960 	}
8961 
8962 	if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
8963 	    iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
8964 		return KERN_INVALID_ARGUMENT;
8965 	}
8966 
8967 	struct thread_attr_for_ipc_propagation attr = {
8968 		.tafip_iotier = iotier,
8969 		.tafip_qos = qos
8970 	};
8971 
8972 	/* Apply propagate attr to port */
8973 	kr = ipc_port_propagate_thread_attr(send_port, attr);
8974 	if (kr != KERN_SUCCESS) {
8975 		return kr;
8976 	}
8977 
8978 	thread_enable_send_importance(current_thread(), TRUE);
8979 
8980 	/* Perform an async kernel upcall on the given send port */
8981 	mach_test_async_upcall(send_port);
8982 	thread_enable_send_importance(current_thread(), FALSE);
8983 
8984 	ipc_port_release_send(send_port);
8985 	return KERN_SUCCESS;
8986 #else
8987 	(void)task;
8988 	(void)send_port;
8989 	(void)qos;
8990 	(void)iotier;
8991 	return KERN_NOT_SUPPORTED;
8992 #endif
8993 }
8994 
8995 #if CONFIG_PROC_RESOURCE_LIMITS
8996 mach_port_name_t
current_task_get_fatal_port_name(void)8997 current_task_get_fatal_port_name(void)
8998 {
8999 	mach_port_t task_fatal_port = MACH_PORT_NULL;
9000 	mach_port_name_t port_name = 0;
9001 
9002 	task_fatal_port = task_allocate_fatal_port();
9003 
9004 	if (task_fatal_port) {
9005 		ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9006 		    IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9007 	}
9008 
9009 	return port_name;
9010 }
9011 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9012 
9013 #if defined(__x86_64__)
9014 bool
curtask_get_insn_copy_optout(void)9015 curtask_get_insn_copy_optout(void)
9016 {
9017 	bool optout;
9018 	task_t cur_task = current_task();
9019 
9020 	task_lock(cur_task);
9021 	optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9022 	task_unlock(cur_task);
9023 
9024 	return optout;
9025 }
9026 
9027 void
curtask_set_insn_copy_optout(void)9028 curtask_set_insn_copy_optout(void)
9029 {
9030 	task_t cur_task = current_task();
9031 
9032 	task_lock(cur_task);
9033 
9034 	cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9035 
9036 	thread_t thread;
9037 	queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9038 		machine_thread_set_insn_copy_optout(thread);
9039 	}
9040 	task_unlock(cur_task);
9041 }
9042 #endif /* defined(__x86_64__) */
9043 
9044 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9045 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9046 {
9047 	assert(task);
9048 	assert(list_size);
9049 
9050 	*list = task->corpse_vmobject_list;
9051 	*list_size = (size_t)task->corpse_vmobject_list_size;
9052 }
9053 
9054 __abortlike
9055 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9056 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9057 {
9058 	panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9059 	    "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9060 }
9061 
9062 proc_ro_t
task_get_ro(task_t t)9063 task_get_ro(task_t t)
9064 {
9065 	proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9066 
9067 	zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9068 	if (__improbable(proc_ro_task(ro) != t)) {
9069 		panic_proc_ro_task_backref_mismatch(t, ro);
9070 	}
9071 
9072 	return ro;
9073 }
9074 
9075 uint32_t
task_ro_flags_get(task_t task)9076 task_ro_flags_get(task_t task)
9077 {
9078 	return task_get_ro(task)->t_flags_ro;
9079 }
9080 
9081 void
task_ro_flags_set(task_t task,uint32_t flags)9082 task_ro_flags_set(task_t task, uint32_t flags)
9083 {
9084 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9085 	    t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9086 }
9087 
9088 void
task_ro_flags_clear(task_t task,uint32_t flags)9089 task_ro_flags_clear(task_t task, uint32_t flags)
9090 {
9091 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9092 	    t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9093 }
9094 
9095 task_control_port_options_t
task_get_control_port_options(task_t task)9096 task_get_control_port_options(task_t task)
9097 {
9098 	return task_get_ro(task)->task_control_port_options;
9099 }
9100 
9101 void
task_set_control_port_options(task_t task,task_control_port_options_t opts)9102 task_set_control_port_options(task_t task, task_control_port_options_t opts)
9103 {
9104 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9105 	    task_control_port_options, &opts);
9106 }
9107 
9108 /*!
9109  * @function kdp_task_is_locked
9110  *
9111  * @abstract
9112  * Checks if task is locked.
9113  *
9114  * @discussion
9115  * NOT SAFE: To be used only by kernel debugger.
9116  *
9117  * @param task task to check
9118  *
9119  * @returns TRUE if the task is locked.
9120  */
9121 boolean_t
kdp_task_is_locked(task_t task)9122 kdp_task_is_locked(task_t task)
9123 {
9124 	return kdp_lck_mtx_lock_spin_is_acquired(&task->lock);
9125 }
9126