xref: /xnu-8796.101.5/osfmk/kern/task.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_FREE_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/task.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59  *		David Black
60  *
61  *	Task management primitives implementation.
62  */
63 /*
64  * Copyright (c) 1993 The University of Utah and
65  * the Computer Systems Laboratory (CSL).  All rights reserved.
66  *
67  * Permission to use, copy, modify and distribute this software and its
68  * documentation is hereby granted, provided that both the copyright
69  * notice and this permission notice appear in all copies of the
70  * software, derivative works or modified versions, and any portions
71  * thereof, and that both notices appear in supporting documentation.
72  *
73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76  *
77  * CSL requests users of this software to return to [email protected] any
78  * improvements that they make and grant CSL redistribution rights.
79  *
80  */
81 /*
82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83  * support for mandatory and extensible security protections.  This notice
84  * is included in support of clause 2.2 (b) of the Apple Public License,
85  * Version 2.0.
86  * Copyright (c) 2005 SPARTA, Inc.
87  */
88 
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101 
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108 
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/recount.h>
120 #include <kern/sched_prim.h>    /* for thread_wakeup */
121 #include <kern/ipc_tt.h>
122 #include <kern/host.h>
123 #include <kern/clock.h>
124 #include <kern/timer.h>
125 #include <kern/assert.h>
126 #include <kern/affinity.h>
127 #include <kern/exc_resource.h>
128 #include <kern/machine.h>
129 #include <kern/policy_internal.h>
130 #include <kern/restartable.h>
131 #include <kern/ipc_kobject.h>
132 
133 #include <corpses/task_corpse.h>
134 #if CONFIG_TELEMETRY
135 #include <kern/telemetry.h>
136 #endif
137 
138 #if CONFIG_PERVASIVE_CPI
139 #include <kern/monotonic.h>
140 #include <machine/monotonic.h>
141 #endif /* CONFIG_PERVASIVE_CPI */
142 
143 #include <os/log.h>
144 
145 #include <vm/pmap.h>
146 #include <vm/vm_map.h>
147 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
148 #include <vm/vm_pageout.h>
149 #include <vm/vm_protos.h>
150 #include <vm/vm_purgeable_internal.h>
151 #include <vm/vm_compressor_pager.h>
152 #include <vm/vm_reclaim_internal.h>
153 
154 #include <sys/proc_ro.h>
155 #include <sys/resource.h>
156 #include <sys/signalvar.h> /* for coredump */
157 #include <sys/bsdtask_info.h>
158 /*
159  * Exported interfaces
160  */
161 
162 #include <mach/task_server.h>
163 #include <mach/mach_host_server.h>
164 #include <mach/mach_port_server.h>
165 
166 #include <vm/vm_shared_region.h>
167 
168 #include <libkern/OSDebug.h>
169 #include <libkern/OSAtomic.h>
170 #include <libkern/section_keywords.h>
171 
172 #include <mach-o/loader.h>
173 #include <kdp/kdp_dyld.h>
174 
175 #include <kern/sfi.h>           /* picks up ledger.h */
176 
177 #if CONFIG_MACF
178 #include <security/mac_mach_internal.h>
179 #endif
180 
181 #include <IOKit/IOBSD.h>
182 #include <kdp/processor_core.h>
183 
184 #if KPERF
185 extern int kpc_force_all_ctrs(task_t, int);
186 #endif
187 
188 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
189 
190 int64_t         next_taskuniqueid = 0;
191 const size_t task_alignment = _Alignof(struct task);
192 extern const size_t proc_alignment;
193 extern size_t proc_struct_size;
194 extern size_t proc_and_task_size;
195 size_t task_struct_size;
196 
197 extern uint32_t ipc_control_port_options;
198 
199 extern int large_corpse_count;
200 
201 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
202 extern void task_disown_frozen_csegs(task_t owner_task);
203 
204 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
205 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
206 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
207 static inline void task_zone_init(void);
208 
209 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
210 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
211     .iko_op_no_senders = task_port_no_senders);
212 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
213     .iko_op_no_senders = task_port_with_flavor_no_senders);
214 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
215     .iko_op_no_senders = task_port_with_flavor_no_senders);
216 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
217     .iko_op_no_senders = task_suspension_no_senders);
218 
219 #if CONFIG_PROC_RESOURCE_LIMITS
220 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
221 static mach_port_t task_allocate_fatal_port(void);
222 
223 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
224     .iko_op_stable     = true,
225     .iko_op_no_senders = task_fatal_port_no_senders);
226 
227 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
228 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
229 
230 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
231 int audio_active = 0;
232 
233 /*
234  *	structure for tracking zone usage
235  *	Used either one per task/thread for all zones or <per-task,per-zone>.
236  */
237 typedef struct zinfo_usage_store_t {
238 	/* These fields may be updated atomically, and so must be 8 byte aligned */
239 	uint64_t        alloc __attribute__((aligned(8)));              /* allocation counter */
240 	uint64_t        free __attribute__((aligned(8)));               /* free counter */
241 } zinfo_usage_store_t;
242 
243 zinfo_usage_store_t tasks_tkm_private;
244 zinfo_usage_store_t tasks_tkm_shared;
245 
246 /* A container to accumulate statistics for expired tasks */
247 expired_task_statistics_t               dead_task_statistics;
248 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
249 
250 ledger_template_t task_ledger_template = NULL;
251 
252 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
253 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
254 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
255 
256 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
257 {.cpu_time = -1,
258  .tkm_private = -1,
259  .tkm_shared = -1,
260  .phys_mem = -1,
261  .wired_mem = -1,
262  .internal = -1,
263  .iokit_mapped = -1,
264  .external = -1,
265  .reusable = -1,
266  .alternate_accounting = -1,
267  .alternate_accounting_compressed = -1,
268  .page_table = -1,
269  .phys_footprint = -1,
270  .internal_compressed = -1,
271  .purgeable_volatile = -1,
272  .purgeable_nonvolatile = -1,
273  .purgeable_volatile_compressed = -1,
274  .purgeable_nonvolatile_compressed = -1,
275  .tagged_nofootprint = -1,
276  .tagged_footprint = -1,
277  .tagged_nofootprint_compressed = -1,
278  .tagged_footprint_compressed = -1,
279  .network_volatile = -1,
280  .network_nonvolatile = -1,
281  .network_volatile_compressed = -1,
282  .network_nonvolatile_compressed = -1,
283  .media_nofootprint = -1,
284  .media_footprint = -1,
285  .media_nofootprint_compressed = -1,
286  .media_footprint_compressed = -1,
287  .graphics_nofootprint = -1,
288  .graphics_footprint = -1,
289  .graphics_nofootprint_compressed = -1,
290  .graphics_footprint_compressed = -1,
291  .neural_nofootprint = -1,
292  .neural_footprint = -1,
293  .neural_nofootprint_compressed = -1,
294  .neural_footprint_compressed = -1,
295  .platform_idle_wakeups = -1,
296  .interrupt_wakeups = -1,
297 #if CONFIG_SCHED_SFI
298  .sfi_wait_times = { 0 /* initialized at runtime */},
299 #endif /* CONFIG_SCHED_SFI */
300  .cpu_time_billed_to_me = -1,
301  .cpu_time_billed_to_others = -1,
302  .physical_writes = -1,
303  .logical_writes = -1,
304  .logical_writes_to_external = -1,
305 #if DEBUG || DEVELOPMENT
306  .pages_grabbed = -1,
307  .pages_grabbed_kern = -1,
308  .pages_grabbed_iopl = -1,
309  .pages_grabbed_upl = -1,
310 #endif
311 #if CONFIG_FREEZE
312  .frozen_to_swap = -1,
313 #endif /* CONFIG_FREEZE */
314  .energy_billed_to_me = -1,
315  .energy_billed_to_others = -1,
316 #if CONFIG_PHYS_WRITE_ACCT
317  .fs_metadata_writes = -1,
318 #endif /* CONFIG_PHYS_WRITE_ACCT */
319 #if CONFIG_MEMORYSTATUS
320  .memorystatus_dirty_time = -1,
321 #endif /* CONFIG_MEMORYSTATUS */
322  .swapins = -1, };
323 
324 /* System sleep state */
325 boolean_t tasks_suspend_state;
326 
327 
328 void init_task_ledgers(void);
329 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
330 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
331 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
332 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
333 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
334 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
335 #if CONFIG_PROC_RESOURCE_LIMITS
336 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
337 mach_port_name_t current_task_get_fatal_port_name(void);
338 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
339 
340 kern_return_t task_suspend_internal(task_t);
341 kern_return_t task_resume_internal(task_t);
342 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
343 
344 extern kern_return_t iokit_task_terminate(task_t task);
345 extern void          iokit_task_app_suspended_changed(task_t task);
346 
347 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
348 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
349 extern kern_return_t thread_resume(thread_t thread);
350 
351 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
352 
353 // Warn tasks when they hit 80% of their memory limit.
354 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
355 
356 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
357 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
358 
359 /*
360  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
361  *
362  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
363  *  stacktraces, aka micro-stackshots)
364  */
365 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
366 
367 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
368 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
369 
370 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
371 
372 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
373 
374 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
375 unsigned int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
376 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
377 
378 /* I/O Monitor Limits */
379 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
380 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
381 
382 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
383 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
384 
385 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
386 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
387 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
388 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
389 static boolean_t global_update_logical_writes(int64_t, int64_t*);
390 
391 #define TASK_MAX_THREAD_LIMIT 256
392 
393 #if MACH_ASSERT
394 int pmap_ledgers_panic = 1;
395 int pmap_ledgers_panic_leeway = 3;
396 #endif /* MACH_ASSERT */
397 
398 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
399 
400 #if CONFIG_COREDUMP
401 int hwm_user_cores = 0; /* high watermark violations generate user core files */
402 #endif
403 
404 #ifdef MACH_BSD
405 extern uint32_t proc_platform(const struct proc *);
406 extern uint32_t proc_sdk(struct proc *);
407 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
408 extern int      proc_pid(struct proc *p);
409 extern int      proc_selfpid(void);
410 extern struct proc *current_proc(void);
411 extern char     *proc_name_address(struct proc *p);
412 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
413 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
414 extern void workq_proc_suspended(struct proc *p);
415 extern void workq_proc_resumed(struct proc *p);
416 extern struct proc *kernproc;
417 
418 #if CONFIG_MEMORYSTATUS
419 extern void     proc_memstat_skip(struct proc* p, boolean_t set);
420 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
421 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
422 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
423 extern uint64_t  memorystatus_available_memory_internal(struct proc *p);
424 
425 #if DEVELOPMENT || DEBUG
426 extern void memorystatus_abort_vm_map_fork(task_t);
427 #endif
428 
429 #endif /* CONFIG_MEMORYSTATUS */
430 
431 #endif /* MACH_BSD */
432 
433 #if DEVELOPMENT || DEBUG
434 int exc_resource_threads_enabled;
435 #endif /* DEVELOPMENT || DEBUG */
436 
437 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
438 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
439 
440 /*
441  * Defaults for controllable EXC_GUARD behaviors
442  *
443  * Internal builds are fatal by default (except BRIDGE).
444  * Create an alternate set of defaults for special processes by name.
445  */
446 struct task_exc_guard_named_default {
447 	char *name;
448 	uint32_t behavior;
449 };
450 #define _TASK_EXC_GUARD_MP_CORPSE  (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
451 #define _TASK_EXC_GUARD_MP_ONCE    (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
452 #define _TASK_EXC_GUARD_MP_FATAL   (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
453 
454 #define _TASK_EXC_GUARD_VM_CORPSE  (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
455 #define _TASK_EXC_GUARD_VM_ONCE    (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
456 #define _TASK_EXC_GUARD_VM_FATAL   (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
457 
458 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
459 #define _TASK_EXC_GUARD_ALL_ONCE   (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
460 #define _TASK_EXC_GUARD_ALL_FATAL  (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
461 
462 /* cannot turn off FATAL and DELIVER bit if set */
463 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
464     TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
465 /* cannot turn on ONCE bit if unset */
466 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
467 
468 #if !defined(XNU_TARGET_OS_BRIDGE)
469 
470 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
471 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
472 /*
473  * These "by-process-name" default overrides are intended to be a short-term fix to
474  * quickly get over races between changes introducing new EXC_GUARD raising behaviors
475  * in some process and a change in default behavior for same. We should ship with
476  * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
477  * exception behavior via task_set_exc_guard_behavior()).
478  *
479  * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
480  * task_exc_guard_default when transitioning this list between empty and
481  * non-empty.
482  */
483 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
484 
485 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
486 
487 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
488 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
489 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
490 
491 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
492 
493 /* Forwards */
494 
495 static void task_hold_locked(task_t task);
496 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
497 static void task_release_locked(task_t task);
498 extern task_t proc_get_task_raw(void *proc);
499 extern void task_ref_hold_proc_task_struct(task_t task);
500 extern void task_release_proc_task_struct(task_t task);
501 
502 static void task_synchronizer_destroy_all(task_t task);
503 static os_ref_count_t
504 task_add_turnstile_watchports_locked(
505 	task_t                      task,
506 	struct task_watchports      *watchports,
507 	struct task_watchport_elem  **previous_elem_array,
508 	ipc_port_t                  *portwatch_ports,
509 	uint32_t                    portwatch_count);
510 
511 static os_ref_count_t
512 task_remove_turnstile_watchports_locked(
513 	task_t                 task,
514 	struct task_watchports *watchports,
515 	ipc_port_t             *port_freelist);
516 
517 static struct task_watchports *
518 task_watchports_alloc_init(
519 	task_t        task,
520 	thread_t      thread,
521 	uint32_t      count);
522 
523 static void
524 task_watchports_deallocate(
525 	struct task_watchports *watchports);
526 
527 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)528 task_set_64bit(
529 	task_t task,
530 	boolean_t is_64bit,
531 	boolean_t is_64bit_data)
532 {
533 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
534 	thread_t thread;
535 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
536 
537 	task_lock(task);
538 
539 	/*
540 	 * Switching to/from 64-bit address spaces
541 	 */
542 	if (is_64bit) {
543 		if (!task_has_64Bit_addr(task)) {
544 			task_set_64Bit_addr(task);
545 		}
546 	} else {
547 		if (task_has_64Bit_addr(task)) {
548 			task_clear_64Bit_addr(task);
549 		}
550 	}
551 
552 	/*
553 	 * Switching to/from 64-bit register state.
554 	 */
555 	if (is_64bit_data) {
556 		if (task_has_64Bit_data(task)) {
557 			goto out;
558 		}
559 
560 		task_set_64Bit_data(task);
561 	} else {
562 		if (!task_has_64Bit_data(task)) {
563 			goto out;
564 		}
565 
566 		task_clear_64Bit_data(task);
567 	}
568 
569 	/* FIXME: On x86, the thread save state flavor can diverge from the
570 	 * task's 64-bit feature flag due to the 32-bit/64-bit register save
571 	 * state dichotomy. Since we can be pre-empted in this interval,
572 	 * certain routines may observe the thread as being in an inconsistent
573 	 * state with respect to its task's 64-bitness.
574 	 */
575 
576 #if defined(__x86_64__) || defined(__arm64__)
577 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
578 		thread_mtx_lock(thread);
579 		machine_thread_switch_addrmode(thread);
580 		thread_mtx_unlock(thread);
581 	}
582 #endif /* defined(__x86_64__) || defined(__arm64__) */
583 
584 out:
585 	task_unlock(task);
586 }
587 
588 bool
task_get_64bit_addr(task_t task)589 task_get_64bit_addr(task_t task)
590 {
591 	return task_has_64Bit_addr(task);
592 }
593 
594 bool
task_get_64bit_data(task_t task)595 task_get_64bit_data(task_t task)
596 {
597 	return task_has_64Bit_data(task);
598 }
599 
600 void
task_set_platform_binary(task_t task,boolean_t is_platform)601 task_set_platform_binary(
602 	task_t task,
603 	boolean_t is_platform)
604 {
605 	if (is_platform) {
606 		task_ro_flags_set(task, TFRO_PLATFORM);
607 	} else {
608 		task_ro_flags_clear(task, TFRO_PLATFORM);
609 	}
610 }
611 
612 boolean_t
task_get_platform_binary(task_t task)613 task_get_platform_binary(task_t task)
614 {
615 	return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
616 }
617 
618 boolean_t
task_is_a_corpse(task_t task)619 task_is_a_corpse(task_t task)
620 {
621 	return (task_ro_flags_get(task) & TFRO_CORPSE) != 0;
622 }
623 
624 void
task_set_corpse(task_t task)625 task_set_corpse(task_t task)
626 {
627 	return task_ro_flags_set(task, TFRO_CORPSE);
628 }
629 
630 void
task_set_immovable_pinned(task_t task)631 task_set_immovable_pinned(task_t task)
632 {
633 	ipc_task_set_immovable_pinned(task);
634 }
635 
636 /*
637  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
638  * Returns "false" if flag is already set, and "true" in other cases.
639  */
640 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)641 task_set_ca_client_wi(
642 	task_t task,
643 	boolean_t set_or_clear)
644 {
645 	bool ret = true;
646 	task_lock(task);
647 	if (set_or_clear) {
648 		/* Tasks can have only one CA_CLIENT work interval */
649 		if (task->t_flags & TF_CA_CLIENT_WI) {
650 			ret = false;
651 		} else {
652 			task->t_flags |= TF_CA_CLIENT_WI;
653 		}
654 	} else {
655 		task->t_flags &= ~TF_CA_CLIENT_WI;
656 	}
657 	task_unlock(task);
658 	return ret;
659 }
660 
661 /*
662  * task_set_dyld_info() is called at most three times.
663  * 1) at task struct creation to set addr/size to zero.
664  * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
665  * 3) is from dyld itself to update location of all_image_info
666  * For security any calls after that are ignored.  The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
667  */
668 kern_return_t
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)669 task_set_dyld_info(
670 	task_t            task,
671 	mach_vm_address_t addr,
672 	mach_vm_size_t    size)
673 {
674 	mach_vm_address_t end;
675 	if (os_add_overflow(addr, size, &end)) {
676 		return KERN_FAILURE;
677 	}
678 
679 	task_lock(task);
680 	/* don't accept updates if all_image_info_addr is final */
681 	if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
682 		bool inputNonZero   = ((addr != 0) || (size != 0));
683 		bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
684 		task->all_image_info_addr = addr;
685 		task->all_image_info_size = size;
686 		/* can only change from a non-zero value to another non-zero once */
687 		if (inputNonZero && currentNonZero) {
688 			task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
689 		}
690 		task_unlock(task);
691 		return KERN_SUCCESS;
692 	} else {
693 		task_unlock(task);
694 		return KERN_FAILURE;
695 	}
696 }
697 
698 bool
task_donates_own_pages(task_t task)699 task_donates_own_pages(
700 	task_t task)
701 {
702 	return task->donates_own_pages;
703 }
704 
705 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)706 task_set_mach_header_address(
707 	task_t task,
708 	mach_vm_address_t addr)
709 {
710 	task_lock(task);
711 	task->mach_header_vm_address = addr;
712 	task_unlock(task);
713 }
714 
715 void
task_bank_reset(__unused task_t task)716 task_bank_reset(__unused task_t task)
717 {
718 	if (task->bank_context != NULL) {
719 		bank_task_destroy(task);
720 	}
721 }
722 
723 /*
724  * NOTE: This should only be called when the P_LINTRANSIT
725  *	 flag is set (the proc_trans lock is held) on the
726  *	 proc associated with the task.
727  */
728 void
task_bank_init(__unused task_t task)729 task_bank_init(__unused task_t task)
730 {
731 	if (task->bank_context != NULL) {
732 		panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
733 	}
734 	bank_task_initialize(task);
735 }
736 
737 void
task_set_did_exec_flag(task_t task)738 task_set_did_exec_flag(task_t task)
739 {
740 	task->t_procflags |= TPF_DID_EXEC;
741 }
742 
743 void
task_clear_exec_copy_flag(task_t task)744 task_clear_exec_copy_flag(task_t task)
745 {
746 	task->t_procflags &= ~TPF_EXEC_COPY;
747 }
748 
749 event_t
task_get_return_wait_event(task_t task)750 task_get_return_wait_event(task_t task)
751 {
752 	return (event_t)&task->returnwait_inheritor;
753 }
754 
755 void
task_clear_return_wait(task_t task,uint32_t flags)756 task_clear_return_wait(task_t task, uint32_t flags)
757 {
758 	if (flags & TCRW_CLEAR_INITIAL_WAIT) {
759 		thread_wakeup(task_get_return_wait_event(task));
760 	}
761 
762 	if (flags & TCRW_CLEAR_FINAL_WAIT) {
763 		is_write_lock(task->itk_space);
764 
765 		task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
766 		task->returnwait_inheritor = NULL;
767 
768 		if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
769 			struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
770 			    TURNSTILE_ULOCK);
771 
772 			waitq_wakeup64_all(&turnstile->ts_waitq,
773 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
774 			    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
775 
776 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
777 
778 			turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
779 			turnstile_cleanup();
780 			task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
781 		}
782 		is_write_unlock(task->itk_space);
783 	}
784 }
785 
786 void __attribute__((noreturn))
task_wait_to_return(void)787 task_wait_to_return(void)
788 {
789 	task_t task = current_task();
790 
791 	is_write_lock(task->itk_space);
792 
793 	if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
794 		struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
795 		    TURNSTILE_ULOCK);
796 
797 		do {
798 			task->t_returnwaitflags |= TRW_LRETURNWAITER;
799 			turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
800 			    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
801 
802 			waitq_assert_wait64(&turnstile->ts_waitq,
803 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
804 			    THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
805 
806 			is_write_unlock(task->itk_space);
807 
808 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
809 
810 			thread_block(THREAD_CONTINUE_NULL);
811 
812 			is_write_lock(task->itk_space);
813 		} while (task->t_returnwaitflags & TRW_LRETURNWAIT);
814 
815 		turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
816 	}
817 
818 	is_write_unlock(task->itk_space);
819 	turnstile_cleanup();
820 
821 
822 #if CONFIG_MACF
823 	/*
824 	 * Before jumping to userspace and allowing this process
825 	 * to execute any code, make sure its credentials are cached,
826 	 * and notify any interested parties.
827 	 */
828 	extern void mach_kauth_cred_thread_update(void);
829 
830 	mach_kauth_cred_thread_update();
831 	mac_proc_notify_exec_complete(current_proc());
832 #endif
833 
834 	thread_bootstrap_return();
835 }
836 
837 boolean_t
task_is_exec_copy(task_t task)838 task_is_exec_copy(task_t task)
839 {
840 	return task_is_exec_copy_internal(task);
841 }
842 
843 boolean_t
task_did_exec(task_t task)844 task_did_exec(task_t task)
845 {
846 	return task_did_exec_internal(task);
847 }
848 
849 boolean_t
task_is_active(task_t task)850 task_is_active(task_t task)
851 {
852 	return task->active;
853 }
854 
855 boolean_t
task_is_halting(task_t task)856 task_is_halting(task_t task)
857 {
858 	return task->halting;
859 }
860 
861 void
task_init(void)862 task_init(void)
863 {
864 	/*
865 	 * Configure per-task memory limit.
866 	 * The boot-arg is interpreted as Megabytes,
867 	 * and takes precedence over the device tree.
868 	 * Setting the boot-arg to 0 disables task limits.
869 	 */
870 	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
871 	    sizeof(max_task_footprint_mb))) {
872 		/*
873 		 * No limit was found in boot-args, so go look in the device tree.
874 		 */
875 		if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
876 		    sizeof(max_task_footprint_mb))) {
877 			/*
878 			 * No limit was found in device tree.
879 			 */
880 			max_task_footprint_mb = 0;
881 		}
882 	}
883 
884 	if (max_task_footprint_mb != 0) {
885 #if CONFIG_MEMORYSTATUS
886 		if (max_task_footprint_mb < 50) {
887 			printf("Warning: max_task_pmem %d below minimum.\n",
888 			    max_task_footprint_mb);
889 			max_task_footprint_mb = 50;
890 		}
891 		printf("Limiting task physical memory footprint to %d MB\n",
892 		    max_task_footprint_mb);
893 
894 		max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024;         // Convert MB to bytes
895 
896 		/*
897 		 * Configure the per-task memory limit warning level.
898 		 * This is computed as a percentage.
899 		 */
900 		max_task_footprint_warning_level = 0;
901 
902 		if (max_mem < 0x40000000) {
903 			/*
904 			 * On devices with < 1GB of memory:
905 			 *    -- set warnings to 50MB below the per-task limit.
906 			 */
907 			if (max_task_footprint_mb > 50) {
908 				max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
909 			}
910 		} else {
911 			/*
912 			 * On devices with >= 1GB of memory:
913 			 *    -- set warnings to 100MB below the per-task limit.
914 			 */
915 			if (max_task_footprint_mb > 100) {
916 				max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
917 			}
918 		}
919 
920 		/*
921 		 * Never allow warning level to land below the default.
922 		 */
923 		if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
924 			max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
925 		}
926 
927 		printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
928 
929 #else
930 		printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
931 #endif /* CONFIG_MEMORYSTATUS */
932 	}
933 
934 #if DEVELOPMENT || DEBUG
935 	if (!PE_parse_boot_argn("exc_resource_threads",
936 	    &exc_resource_threads_enabled,
937 	    sizeof(exc_resource_threads_enabled))) {
938 		exc_resource_threads_enabled = 1;
939 	}
940 	PE_parse_boot_argn("task_exc_guard_default",
941 	    &task_exc_guard_default,
942 	    sizeof(task_exc_guard_default));
943 #endif /* DEVELOPMENT || DEBUG */
944 
945 #if CONFIG_COREDUMP
946 	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
947 	    sizeof(hwm_user_cores))) {
948 		hwm_user_cores = 0;
949 	}
950 #endif
951 
952 	proc_init_cpumon_params();
953 
954 	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
955 		task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
956 	}
957 
958 	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
959 		task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
960 	}
961 
962 	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
963 	    sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
964 		task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
965 	}
966 
967 	if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
968 	    sizeof(disable_exc_resource))) {
969 		disable_exc_resource = 0;
970 	}
971 
972 	if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
973 		task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
974 	}
975 
976 	if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
977 		task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
978 	}
979 
980 	if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
981 		io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
982 	}
983 
984 /*
985  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
986  * sets up the ledgers for the default coalition. If we don't have coalitions,
987  * then we have to call it now.
988  */
989 #if CONFIG_COALITIONS
990 	assert(task_ledger_template);
991 #else /* CONFIG_COALITIONS */
992 	init_task_ledgers();
993 #endif /* CONFIG_COALITIONS */
994 
995 	task_ref_init();
996 	task_zone_init();
997 
998 #ifdef __LP64__
999 	boolean_t is_64bit = TRUE;
1000 #else
1001 	boolean_t is_64bit = FALSE;
1002 #endif
1003 
1004 	kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
1005 	kernel_task = proc_get_task_raw(kernproc);
1006 
1007 	/*
1008 	 * Create the kernel task as the first task.
1009 	 */
1010 	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1011 	    is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, kernel_task) != KERN_SUCCESS) {
1012 		panic("task_init");
1013 	}
1014 
1015 	ipc_task_enable(kernel_task);
1016 
1017 #if defined(HAS_APPLE_PAC)
1018 	kernel_task->rop_pid = ml_default_rop_pid();
1019 	kernel_task->jop_pid = ml_default_jop_pid();
1020 	// kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1021 	// disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1022 	ml_task_set_disable_user_jop(kernel_task, FALSE);
1023 #endif
1024 
1025 	vm_map_deallocate(kernel_task->map);
1026 	kernel_task->map = kernel_map;
1027 }
1028 
1029 static inline void
task_zone_init(void)1030 task_zone_init(void)
1031 {
1032 	proc_struct_size = roundup(proc_struct_size, task_alignment);
1033 	task_struct_size = roundup(sizeof(struct task), proc_alignment);
1034 	proc_and_task_size = proc_struct_size + task_struct_size;
1035 
1036 	proc_task_zone = zone_create_ext("proc_task", proc_and_task_size,
1037 	    ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1038 }
1039 
1040 /*
1041  * Task ledgers
1042  * ------------
1043  *
1044  * phys_footprint
1045  *   Physical footprint: This is the sum of:
1046  *     + (internal - alternate_accounting)
1047  *     + (internal_compressed - alternate_accounting_compressed)
1048  *     + iokit_mapped
1049  *     + purgeable_nonvolatile
1050  *     + purgeable_nonvolatile_compressed
1051  *     + page_table
1052  *
1053  * internal
1054  *   The task's anonymous memory, which on iOS is always resident.
1055  *
1056  * internal_compressed
1057  *   Amount of this task's internal memory which is held by the compressor.
1058  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1059  *   and could be either decompressed back into memory, or paged out to storage, depending
1060  *   on our implementation.
1061  *
1062  * iokit_mapped
1063  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1064  *    clean/dirty or internal/external state].
1065  *
1066  * alternate_accounting
1067  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1068  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1069  *   double counting.
1070  *
1071  * pages_grabbed
1072  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1073  *   which track UPL, IOPL and Kernel page grabs.
1074  */
1075 void
init_task_ledgers(void)1076 init_task_ledgers(void)
1077 {
1078 	ledger_template_t t;
1079 
1080 	assert(task_ledger_template == NULL);
1081 	assert(kernel_task == TASK_NULL);
1082 
1083 #if MACH_ASSERT
1084 	PE_parse_boot_argn("pmap_ledgers_panic",
1085 	    &pmap_ledgers_panic,
1086 	    sizeof(pmap_ledgers_panic));
1087 	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1088 	    &pmap_ledgers_panic_leeway,
1089 	    sizeof(pmap_ledgers_panic_leeway));
1090 #endif /* MACH_ASSERT */
1091 
1092 	if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1093 		panic("couldn't create task ledger template");
1094 	}
1095 
1096 	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1097 	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1098 	    "physmem", "bytes");
1099 	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1100 	    "bytes");
1101 	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1102 	    "bytes");
1103 	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1104 	    "bytes");
1105 	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1106 	    "bytes");
1107 	task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1108 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1109 	task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1110 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1111 	task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1112 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1113 	task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1114 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1115 	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1116 	    "bytes");
1117 	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1118 	    "bytes");
1119 	task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1120 	task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1121 	task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1122 	task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1123 	task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1124 	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1125 #if DEBUG || DEVELOPMENT
1126 	task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1127 	task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1128 	task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1129 	task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1130 #endif
1131 	task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1132 	task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1133 	task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1134 	task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1135 	task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1136 	task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1137 	task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1138 	task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1139 	task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1140 	task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1141 	task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1142 	task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1143 	task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1144 	task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1145 	task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1146 	task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1147 	task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1148 	task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1149 	task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1150 	task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1151 
1152 #if CONFIG_FREEZE
1153 	task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1154 #endif /* CONFIG_FREEZE */
1155 
1156 	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1157 	    "count");
1158 	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1159 	    "count");
1160 
1161 #if CONFIG_SCHED_SFI
1162 	sfi_class_id_t class_id, ledger_alias;
1163 	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1164 		task_ledgers.sfi_wait_times[class_id] = -1;
1165 	}
1166 
1167 	/* don't account for UNSPECIFIED */
1168 	for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1169 		ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1170 		if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1171 			/* Check to see if alias has been registered yet */
1172 			if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1173 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1174 			} else {
1175 				/* Otherwise, initialize it first */
1176 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1177 			}
1178 		} else {
1179 			task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1180 		}
1181 
1182 		if (task_ledgers.sfi_wait_times[class_id] < 0) {
1183 			panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1184 		}
1185 	}
1186 
1187 	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1188 #endif /* CONFIG_SCHED_SFI */
1189 
1190 	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1191 	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1192 	task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1193 	task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1194 	task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1195 #if CONFIG_PHYS_WRITE_ACCT
1196 	task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1197 #endif /* CONFIG_PHYS_WRITE_ACCT */
1198 	task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1199 	task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1200 
1201 #if CONFIG_MEMORYSTATUS
1202 	task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1203 #endif /* CONFIG_MEMORYSTATUS */
1204 
1205 	task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1206 	    LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1207 
1208 	if ((task_ledgers.cpu_time < 0) ||
1209 	    (task_ledgers.tkm_private < 0) ||
1210 	    (task_ledgers.tkm_shared < 0) ||
1211 	    (task_ledgers.phys_mem < 0) ||
1212 	    (task_ledgers.wired_mem < 0) ||
1213 	    (task_ledgers.internal < 0) ||
1214 	    (task_ledgers.external < 0) ||
1215 	    (task_ledgers.reusable < 0) ||
1216 	    (task_ledgers.iokit_mapped < 0) ||
1217 	    (task_ledgers.alternate_accounting < 0) ||
1218 	    (task_ledgers.alternate_accounting_compressed < 0) ||
1219 	    (task_ledgers.page_table < 0) ||
1220 	    (task_ledgers.phys_footprint < 0) ||
1221 	    (task_ledgers.internal_compressed < 0) ||
1222 	    (task_ledgers.purgeable_volatile < 0) ||
1223 	    (task_ledgers.purgeable_nonvolatile < 0) ||
1224 	    (task_ledgers.purgeable_volatile_compressed < 0) ||
1225 	    (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1226 	    (task_ledgers.tagged_nofootprint < 0) ||
1227 	    (task_ledgers.tagged_footprint < 0) ||
1228 	    (task_ledgers.tagged_nofootprint_compressed < 0) ||
1229 	    (task_ledgers.tagged_footprint_compressed < 0) ||
1230 #if CONFIG_FREEZE
1231 	    (task_ledgers.frozen_to_swap < 0) ||
1232 #endif /* CONFIG_FREEZE */
1233 	    (task_ledgers.network_volatile < 0) ||
1234 	    (task_ledgers.network_nonvolatile < 0) ||
1235 	    (task_ledgers.network_volatile_compressed < 0) ||
1236 	    (task_ledgers.network_nonvolatile_compressed < 0) ||
1237 	    (task_ledgers.media_nofootprint < 0) ||
1238 	    (task_ledgers.media_footprint < 0) ||
1239 	    (task_ledgers.media_nofootprint_compressed < 0) ||
1240 	    (task_ledgers.media_footprint_compressed < 0) ||
1241 	    (task_ledgers.graphics_nofootprint < 0) ||
1242 	    (task_ledgers.graphics_footprint < 0) ||
1243 	    (task_ledgers.graphics_nofootprint_compressed < 0) ||
1244 	    (task_ledgers.graphics_footprint_compressed < 0) ||
1245 	    (task_ledgers.neural_nofootprint < 0) ||
1246 	    (task_ledgers.neural_footprint < 0) ||
1247 	    (task_ledgers.neural_nofootprint_compressed < 0) ||
1248 	    (task_ledgers.neural_footprint_compressed < 0) ||
1249 	    (task_ledgers.platform_idle_wakeups < 0) ||
1250 	    (task_ledgers.interrupt_wakeups < 0) ||
1251 	    (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1252 	    (task_ledgers.physical_writes < 0) ||
1253 	    (task_ledgers.logical_writes < 0) ||
1254 	    (task_ledgers.logical_writes_to_external < 0) ||
1255 #if CONFIG_PHYS_WRITE_ACCT
1256 	    (task_ledgers.fs_metadata_writes < 0) ||
1257 #endif /* CONFIG_PHYS_WRITE_ACCT */
1258 #if CONFIG_MEMORYSTATUS
1259 	    (task_ledgers.memorystatus_dirty_time < 0) ||
1260 #endif /* CONFIG_MEMORYSTATUS */
1261 	    (task_ledgers.energy_billed_to_me < 0) ||
1262 	    (task_ledgers.energy_billed_to_others < 0) ||
1263 	    (task_ledgers.swapins < 0)
1264 	    ) {
1265 		panic("couldn't create entries for task ledger template");
1266 	}
1267 
1268 	ledger_track_credit_only(t, task_ledgers.phys_footprint);
1269 	ledger_track_credit_only(t, task_ledgers.internal);
1270 	ledger_track_credit_only(t, task_ledgers.external);
1271 	ledger_track_credit_only(t, task_ledgers.reusable);
1272 
1273 	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1274 	ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1275 	ledger_track_maximum(t, task_ledgers.internal, 60);
1276 	ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1277 	ledger_track_maximum(t, task_ledgers.reusable, 60);
1278 	ledger_track_maximum(t, task_ledgers.external, 60);
1279 #if MACH_ASSERT
1280 	if (pmap_ledgers_panic) {
1281 		ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1282 		ledger_panic_on_negative(t, task_ledgers.page_table);
1283 		ledger_panic_on_negative(t, task_ledgers.internal);
1284 		ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1285 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1286 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1287 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1288 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1289 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1290 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1291 #if CONFIG_PHYS_WRITE_ACCT
1292 		ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1293 #endif /* CONFIG_PHYS_WRITE_ACCT */
1294 
1295 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1296 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1297 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1298 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1299 		ledger_panic_on_negative(t, task_ledgers.network_volatile);
1300 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1301 		ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1302 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1303 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1304 		ledger_panic_on_negative(t, task_ledgers.media_footprint);
1305 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1306 		ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1307 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1308 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1309 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1310 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1311 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1312 		ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1313 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1314 		ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1315 	}
1316 #endif /* MACH_ASSERT */
1317 
1318 #if CONFIG_MEMORYSTATUS
1319 	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1320 #endif /* CONFIG_MEMORYSTATUS */
1321 
1322 	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1323 	    task_wakeups_rate_exceeded, NULL, NULL);
1324 	ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1325 
1326 #if !XNU_MONITOR
1327 	ledger_template_complete(t);
1328 #else /* !XNU_MONITOR */
1329 	ledger_template_complete_secure_alloc(t);
1330 #endif /* XNU_MONITOR */
1331 	task_ledger_template = t;
1332 }
1333 
1334 /* Create a task, but leave the task ports disabled */
1335 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_flags_ro,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t child_task)1336 task_create_internal(
1337 	task_t             parent_task,            /* Null-able */
1338 	proc_ro_t          proc_ro,
1339 	coalition_t        *parent_coalitions __unused,
1340 	boolean_t          inherit_memory,
1341 	boolean_t          is_64bit,
1342 	boolean_t          is_64bit_data,
1343 	uint32_t           t_flags,
1344 	uint32_t           t_flags_ro,
1345 	uint32_t           t_procflags,
1346 	uint8_t            t_returnwaitflags,
1347 	task_t             child_task)
1348 {
1349 	task_t                  new_task;
1350 	vm_shared_region_t      shared_region;
1351 	ledger_t                ledger = NULL;
1352 	struct task_ro_data     task_ro_data = {};
1353 	uint32_t                parent_t_flags_ro = 0;
1354 
1355 	new_task = child_task;
1356 
1357 	if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1358 		return KERN_RESOURCE_SHORTAGE;
1359 	}
1360 
1361 	/* allocate with active entries */
1362 	assert(task_ledger_template != NULL);
1363 	ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1364 	if (ledger == NULL) {
1365 		task_ref_count_fini(new_task);
1366 		return KERN_RESOURCE_SHORTAGE;
1367 	}
1368 
1369 	counter_alloc(&(new_task->faults));
1370 
1371 #if defined(HAS_APPLE_PAC)
1372 	ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1373 	ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1374 	ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1375 #endif
1376 
1377 
1378 	new_task->ledger = ledger;
1379 
1380 	/* if inherit_memory is true, parent_task MUST not be NULL */
1381 	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1382 #if CONFIG_DEFERRED_RECLAIM
1383 		if (parent_task->deferred_reclamation_metadata) {
1384 			/*
1385 			 * Prevent concurrent reclaims while we're forking the parent_task's map,
1386 			 * so that the child's map is in sync with the forked reclamation
1387 			 * metadata.
1388 			 */
1389 			vm_deferred_reclamation_buffer_lock(parent_task->deferred_reclamation_metadata);
1390 		}
1391 #endif /* CONFIG_DEFERRED_RECLAIM */
1392 		new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1393 #if CONFIG_DEFERRED_RECLAIM
1394 		if (parent_task->deferred_reclamation_metadata) {
1395 			new_task->deferred_reclamation_metadata =
1396 			    vm_deferred_reclamation_buffer_fork(new_task, parent_task->deferred_reclamation_metadata);
1397 		}
1398 #endif /* CONFIG_DEFERRED_RECLAIM */
1399 	} else {
1400 		unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1401 		pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1402 		vm_map_t new_map;
1403 
1404 		if (pmap == NULL) {
1405 			counter_free(&new_task->faults);
1406 			ledger_dereference(ledger);
1407 			task_ref_count_fini(new_task);
1408 			return KERN_RESOURCE_SHORTAGE;
1409 		}
1410 		new_map = vm_map_create_options(pmap,
1411 		    (vm_map_offset_t)(VM_MIN_ADDRESS),
1412 		    (vm_map_offset_t)(VM_MAX_ADDRESS),
1413 		    VM_MAP_CREATE_PAGEABLE);
1414 		if (parent_task) {
1415 			vm_map_inherit_limits(new_map, parent_task->map);
1416 		}
1417 		new_task->map = new_map;
1418 	}
1419 
1420 	if (new_task->map == NULL) {
1421 		counter_free(&new_task->faults);
1422 		ledger_dereference(ledger);
1423 		task_ref_count_fini(new_task);
1424 		return KERN_RESOURCE_SHORTAGE;
1425 	}
1426 
1427 #if defined(CONFIG_SCHED_MULTIQ)
1428 	new_task->sched_group = sched_group_create();
1429 #endif
1430 
1431 	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1432 	queue_init(&new_task->threads);
1433 	new_task->suspend_count = 0;
1434 	new_task->thread_count = 0;
1435 	new_task->active_thread_count = 0;
1436 	new_task->user_stop_count = 0;
1437 	new_task->legacy_stop_count = 0;
1438 	new_task->active = TRUE;
1439 	new_task->halting = FALSE;
1440 	new_task->priv_flags = 0;
1441 	new_task->t_flags = t_flags;
1442 	task_ro_data.t_flags_ro = t_flags_ro;
1443 	new_task->t_procflags = t_procflags;
1444 	new_task->t_returnwaitflags = t_returnwaitflags;
1445 	new_task->returnwait_inheritor = current_thread();
1446 	new_task->importance = 0;
1447 	new_task->crashed_thread_id = 0;
1448 	new_task->watchports = NULL;
1449 	new_task->t_rr_ranges = NULL;
1450 
1451 	new_task->bank_context = NULL;
1452 
1453 	if (parent_task) {
1454 		parent_t_flags_ro = task_ro_flags_get(parent_task);
1455 	}
1456 
1457 #if __has_feature(ptrauth_calls)
1458 	/* Inherit the pac exception flags from parent if in fork */
1459 	if (parent_task && inherit_memory) {
1460 		task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1461 		    TFRO_PAC_EXC_FATAL));
1462 	}
1463 #endif
1464 
1465 #ifdef MACH_BSD
1466 	new_task->corpse_info = NULL;
1467 #endif /* MACH_BSD */
1468 
1469 	/* kern_task not created by this function has unique id 0, start with 1 here. */
1470 	task_set_uniqueid(new_task);
1471 
1472 #if CONFIG_MACF
1473 	set_task_crash_label(new_task, NULL);
1474 
1475 	task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1476 	task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1477 #endif
1478 
1479 #if CONFIG_MEMORYSTATUS
1480 	if (max_task_footprint != 0) {
1481 		ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1482 	}
1483 #endif /* CONFIG_MEMORYSTATUS */
1484 
1485 	if (task_wakeups_monitor_rate != 0) {
1486 		uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1487 		int32_t  rate;        // Ignored because of WAKEMON_SET_DEFAULTS
1488 		task_wakeups_monitor_ctl(new_task, &flags, &rate);
1489 	}
1490 
1491 #if CONFIG_IO_ACCOUNTING
1492 	uint32_t flags = IOMON_ENABLE;
1493 	task_io_monitor_ctl(new_task, &flags);
1494 #endif /* CONFIG_IO_ACCOUNTING */
1495 
1496 	machine_task_init(new_task, parent_task, inherit_memory);
1497 
1498 	new_task->task_debug = NULL;
1499 
1500 #if DEVELOPMENT || DEBUG
1501 	new_task->task_unnested = FALSE;
1502 	new_task->task_disconnected_count = 0;
1503 #endif
1504 	queue_init(&new_task->semaphore_list);
1505 	new_task->semaphores_owned = 0;
1506 
1507 	new_task->vtimers = 0;
1508 
1509 	new_task->shared_region = NULL;
1510 
1511 	new_task->affinity_space = NULL;
1512 
1513 	new_task->t_kpc = 0;
1514 
1515 	new_task->pidsuspended = FALSE;
1516 	new_task->frozen = FALSE;
1517 	new_task->changing_freeze_state = FALSE;
1518 	new_task->rusage_cpu_flags = 0;
1519 	new_task->rusage_cpu_percentage = 0;
1520 	new_task->rusage_cpu_interval = 0;
1521 	new_task->rusage_cpu_deadline = 0;
1522 	new_task->rusage_cpu_callt = NULL;
1523 #if MACH_ASSERT
1524 	new_task->suspends_outstanding = 0;
1525 #endif
1526 	recount_task_init(&new_task->tk_recount);
1527 
1528 #if HYPERVISOR
1529 	new_task->hv_task_target = NULL;
1530 #endif /* HYPERVISOR */
1531 
1532 #if CONFIG_TASKWATCH
1533 	queue_init(&new_task->task_watchers);
1534 	new_task->num_taskwatchers  = 0;
1535 	new_task->watchapplying  = 0;
1536 #endif /* CONFIG_TASKWATCH */
1537 
1538 	new_task->mem_notify_reserved = 0;
1539 	new_task->memlimit_attrs_reserved = 0;
1540 
1541 	new_task->requested_policy = default_task_requested_policy;
1542 	new_task->effective_policy = default_task_effective_policy;
1543 
1544 	new_task->task_shared_region_slide = -1;
1545 
1546 	if (parent_task != NULL) {
1547 		task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1548 		task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1549 
1550 		/* only inherit the option bits, no effect until task_set_immovable_pinned() */
1551 		task_ro_data.task_control_port_options = task_get_control_port_options(parent_task);
1552 
1553 		task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1554 #if CONFIG_MACF
1555 		if (!(t_flags & TF_CORPSE_FORK)) {
1556 			task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(parent_task);
1557 			task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(parent_task);
1558 		}
1559 #endif
1560 	} else {
1561 		task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1562 		task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1563 
1564 		task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1565 	}
1566 
1567 	/* must set before task_importance_init_from_parent: */
1568 	if (proc_ro != NULL) {
1569 		new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1570 	} else {
1571 		new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1572 	}
1573 
1574 	ipc_task_init(new_task, parent_task);
1575 
1576 	task_importance_init_from_parent(new_task, parent_task);
1577 
1578 	new_task->corpse_vmobject_list = NULL;
1579 
1580 	if (parent_task != TASK_NULL) {
1581 		/* inherit the parent's shared region */
1582 		shared_region = vm_shared_region_get(parent_task);
1583 		if (shared_region != NULL) {
1584 			vm_shared_region_set(new_task, shared_region);
1585 		}
1586 
1587 #if __has_feature(ptrauth_calls)
1588 		/* use parent's shared_region_id */
1589 		char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1590 		if (shared_region_id != NULL) {
1591 			shared_region_key_alloc(shared_region_id, FALSE, 0);         /* get a reference */
1592 		}
1593 		task_set_shared_region_id(new_task, shared_region_id);
1594 #endif /* __has_feature(ptrauth_calls) */
1595 
1596 		if (task_has_64Bit_addr(parent_task)) {
1597 			task_set_64Bit_addr(new_task);
1598 		}
1599 
1600 		if (task_has_64Bit_data(parent_task)) {
1601 			task_set_64Bit_data(new_task);
1602 		}
1603 
1604 		new_task->all_image_info_addr = parent_task->all_image_info_addr;
1605 		new_task->all_image_info_size = parent_task->all_image_info_size;
1606 		new_task->mach_header_vm_address = 0;
1607 
1608 		if (inherit_memory && parent_task->affinity_space) {
1609 			task_affinity_create(parent_task, new_task);
1610 		}
1611 
1612 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1613 
1614 		new_task->task_exc_guard = parent_task->task_exc_guard;
1615 		if (parent_task->t_flags & TF_NO_SMT) {
1616 			new_task->t_flags |= TF_NO_SMT;
1617 		}
1618 
1619 		if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1620 			new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1621 		}
1622 
1623 		if (parent_task->t_flags & TF_TECS) {
1624 			new_task->t_flags |= TF_TECS;
1625 		}
1626 
1627 #if defined(__x86_64__)
1628 		if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1629 			new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1630 		}
1631 #endif
1632 		new_task->priority = BASEPRI_DEFAULT;
1633 		new_task->max_priority = MAXPRI_USER;
1634 
1635 		task_policy_create(new_task, parent_task);
1636 	} else {
1637 #ifdef __LP64__
1638 		if (is_64bit) {
1639 			task_set_64Bit_addr(new_task);
1640 		}
1641 #endif
1642 
1643 		if (is_64bit_data) {
1644 			task_set_64Bit_data(new_task);
1645 		}
1646 
1647 		new_task->all_image_info_addr = (mach_vm_address_t)0;
1648 		new_task->all_image_info_size = (mach_vm_size_t)0;
1649 
1650 		new_task->pset_hint = PROCESSOR_SET_NULL;
1651 
1652 		new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1653 
1654 		if (new_task == kernel_task) {
1655 			new_task->priority = BASEPRI_KERNEL;
1656 			new_task->max_priority = MAXPRI_KERNEL;
1657 		} else {
1658 			new_task->priority = BASEPRI_DEFAULT;
1659 			new_task->max_priority = MAXPRI_USER;
1660 		}
1661 	}
1662 
1663 	bzero(new_task->coalition, sizeof(new_task->coalition));
1664 	for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1665 		queue_chain_init(new_task->task_coalition[i]);
1666 	}
1667 
1668 	/* Allocate I/O Statistics */
1669 	new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1670 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1671 
1672 	bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1673 	bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1674 
1675 	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1676 
1677 	counter_alloc(&(new_task->pageins));
1678 	counter_alloc(&(new_task->cow_faults));
1679 	counter_alloc(&(new_task->messages_sent));
1680 	counter_alloc(&(new_task->messages_received));
1681 
1682 	/* Copy resource acc. info from Parent for Corpe Forked task. */
1683 	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1684 		task_rollup_accounting_info(new_task, parent_task);
1685 		task_store_owned_vmobject_info(new_task, parent_task);
1686 	} else {
1687 		/* Initialize to zero for standard fork/spawn case */
1688 		new_task->total_runnable_time = 0;
1689 		new_task->syscalls_mach = 0;
1690 		new_task->syscalls_unix = 0;
1691 		new_task->c_switch = 0;
1692 		new_task->p_switch = 0;
1693 		new_task->ps_switch = 0;
1694 		new_task->decompressions = 0;
1695 		new_task->low_mem_notified_warn = 0;
1696 		new_task->low_mem_notified_critical = 0;
1697 		new_task->purged_memory_warn = 0;
1698 		new_task->purged_memory_critical = 0;
1699 		new_task->low_mem_privileged_listener = 0;
1700 		new_task->memlimit_is_active = 0;
1701 		new_task->memlimit_is_fatal = 0;
1702 		new_task->memlimit_active_exc_resource = 0;
1703 		new_task->memlimit_inactive_exc_resource = 0;
1704 		new_task->task_timer_wakeups_bin_1 = 0;
1705 		new_task->task_timer_wakeups_bin_2 = 0;
1706 		new_task->task_gpu_ns = 0;
1707 		new_task->task_writes_counters_internal.task_immediate_writes = 0;
1708 		new_task->task_writes_counters_internal.task_deferred_writes = 0;
1709 		new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1710 		new_task->task_writes_counters_internal.task_metadata_writes = 0;
1711 		new_task->task_writes_counters_external.task_immediate_writes = 0;
1712 		new_task->task_writes_counters_external.task_deferred_writes = 0;
1713 		new_task->task_writes_counters_external.task_invalidated_writes = 0;
1714 		new_task->task_writes_counters_external.task_metadata_writes = 0;
1715 #if CONFIG_PHYS_WRITE_ACCT
1716 		new_task->task_fs_metadata_writes = 0;
1717 #endif /* CONFIG_PHYS_WRITE_ACCT */
1718 	}
1719 
1720 
1721 	new_task->donates_own_pages = FALSE;
1722 #if CONFIG_COALITIONS
1723 	if (!(t_flags & TF_CORPSE_FORK)) {
1724 		/* TODO: there is no graceful failure path here... */
1725 		if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1726 			coalitions_adopt_task(parent_coalitions, new_task);
1727 			if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1728 				new_task->donates_own_pages = coalition_is_swappable(parent_coalitions[COALITION_TYPE_JETSAM]);
1729 			}
1730 		} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1731 			/*
1732 			 * all tasks at least have a resource coalition, so
1733 			 * if the parent has one then inherit all coalitions
1734 			 * the parent is a part of
1735 			 */
1736 			coalitions_adopt_task(parent_task->coalition, new_task);
1737 			if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1738 				new_task->donates_own_pages = coalition_is_swappable(parent_task->coalition[COALITION_TYPE_JETSAM]);
1739 			}
1740 		} else {
1741 			/* TODO: assert that new_task will be PID 1 (launchd) */
1742 			coalitions_adopt_init_task(new_task);
1743 		}
1744 		/*
1745 		 * on exec, we need to transfer the coalition roles from the
1746 		 * parent task to the exec copy task.
1747 		 */
1748 		if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1749 			int coal_roles[COALITION_NUM_TYPES];
1750 			task_coalition_roles(parent_task, coal_roles);
1751 			(void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1752 		}
1753 	} else {
1754 		coalitions_adopt_corpse_task(new_task);
1755 	}
1756 
1757 	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1758 		panic("created task is not a member of a resource coalition");
1759 	}
1760 	task_set_coalition_member(new_task);
1761 #endif /* CONFIG_COALITIONS */
1762 
1763 	new_task->dispatchqueue_offset = 0;
1764 	if (parent_task != NULL) {
1765 		new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1766 	}
1767 
1768 	new_task->task_can_transfer_memory_ownership = FALSE;
1769 	new_task->task_volatile_objects = 0;
1770 	new_task->task_nonvolatile_objects = 0;
1771 	new_task->task_objects_disowning = FALSE;
1772 	new_task->task_objects_disowned = FALSE;
1773 	new_task->task_owned_objects = 0;
1774 	queue_init(&new_task->task_objq);
1775 
1776 #if CONFIG_FREEZE
1777 	queue_init(&new_task->task_frozen_cseg_q);
1778 #endif /* CONFIG_FREEZE */
1779 
1780 	task_objq_lock_init(new_task);
1781 
1782 #if __arm64__
1783 	new_task->task_legacy_footprint = FALSE;
1784 	new_task->task_extra_footprint_limit = FALSE;
1785 	new_task->task_ios13extended_footprint_limit = FALSE;
1786 #endif /* __arm64__ */
1787 	new_task->task_region_footprint = FALSE;
1788 	new_task->task_has_crossed_thread_limit = FALSE;
1789 	new_task->task_thread_limit = 0;
1790 #if CONFIG_SECLUDED_MEMORY
1791 	new_task->task_can_use_secluded_mem = FALSE;
1792 	new_task->task_could_use_secluded_mem = FALSE;
1793 	new_task->task_could_also_use_secluded_mem = FALSE;
1794 	new_task->task_suppressed_secluded = FALSE;
1795 #endif /* CONFIG_SECLUDED_MEMORY */
1796 
1797 	/*
1798 	 * t_flags is set up above. But since we don't
1799 	 * support darkwake mode being set that way
1800 	 * currently, we clear it out here explicitly.
1801 	 */
1802 	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1803 
1804 	queue_init(&new_task->io_user_clients);
1805 	new_task->loadTag = 0;
1806 
1807 	lck_mtx_lock(&tasks_threads_lock);
1808 	queue_enter(&tasks, new_task, task_t, tasks);
1809 	tasks_count++;
1810 	if (tasks_suspend_state) {
1811 		task_suspend_internal(new_task);
1812 	}
1813 	lck_mtx_unlock(&tasks_threads_lock);
1814 	task_ref_hold_proc_task_struct(new_task);
1815 
1816 	return KERN_SUCCESS;
1817 }
1818 
1819 /*
1820  *	task_rollup_accounting_info
1821  *
1822  *	Roll up accounting stats. Used to rollup stats
1823  *	for exec copy task and corpse fork.
1824  */
1825 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1826 task_rollup_accounting_info(task_t to_task, task_t from_task)
1827 {
1828 	assert(from_task != to_task);
1829 
1830 	recount_task_copy(&to_task->tk_recount, &from_task->tk_recount);
1831 	to_task->total_runnable_time = from_task->total_runnable_time;
1832 	counter_add(&to_task->faults, counter_load(&from_task->faults));
1833 	counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1834 	counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1835 	counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1836 	counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1837 	to_task->decompressions = from_task->decompressions;
1838 	to_task->syscalls_mach = from_task->syscalls_mach;
1839 	to_task->syscalls_unix = from_task->syscalls_unix;
1840 	to_task->c_switch = from_task->c_switch;
1841 	to_task->p_switch = from_task->p_switch;
1842 	to_task->ps_switch = from_task->ps_switch;
1843 	to_task->extmod_statistics = from_task->extmod_statistics;
1844 	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1845 	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1846 	to_task->purged_memory_warn = from_task->purged_memory_warn;
1847 	to_task->purged_memory_critical = from_task->purged_memory_critical;
1848 	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1849 	*to_task->task_io_stats = *from_task->task_io_stats;
1850 	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1851 	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1852 	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1853 	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1854 	to_task->task_gpu_ns = from_task->task_gpu_ns;
1855 	to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1856 	to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1857 	to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1858 	to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1859 	to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1860 	to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1861 	to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1862 	to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1863 #if CONFIG_PHYS_WRITE_ACCT
1864 	to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1865 #endif /* CONFIG_PHYS_WRITE_ACCT */
1866 
1867 #if CONFIG_MEMORYSTATUS
1868 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
1869 #endif /* CONFIG_MEMORYSTATUS */
1870 
1871 	/* Skip ledger roll up for memory accounting entries */
1872 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1873 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1874 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1875 #if CONFIG_SCHED_SFI
1876 	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1877 		ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1878 	}
1879 #endif
1880 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1881 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1882 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1883 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1884 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1885 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1886 }
1887 
1888 /*
1889  *	task_deallocate_internal:
1890  *
1891  *	Drop a reference on a task.
1892  *	Don't call this directly.
1893  */
1894 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
1895 void
task_deallocate_internal(task_t task,os_ref_count_t refs)1896 task_deallocate_internal(
1897 	task_t          task,
1898 	os_ref_count_t  refs)
1899 {
1900 	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1901 
1902 	if (task == TASK_NULL) {
1903 		return;
1904 	}
1905 
1906 #if IMPORTANCE_INHERITANCE
1907 	if (refs == 1) {
1908 		/*
1909 		 * If last ref potentially comes from the task's importance,
1910 		 * disconnect it.  But more task refs may be added before
1911 		 * that completes, so wait for the reference to go to zero
1912 		 * naturally (it may happen on a recursive task_deallocate()
1913 		 * from the ipc_importance_disconnect_task() call).
1914 		 */
1915 		if (IIT_NULL != task->task_imp_base) {
1916 			ipc_importance_disconnect_task(task);
1917 		}
1918 		return;
1919 	}
1920 #endif /* IMPORTANCE_INHERITANCE */
1921 
1922 	if (refs > 0) {
1923 		return;
1924 	}
1925 
1926 	/*
1927 	 * The task should be dead at this point. Ensure other resources
1928 	 * like threads, are gone before we trash the world.
1929 	 */
1930 	assert(queue_empty(&task->threads));
1931 	assert(get_bsdtask_info(task) == NULL);
1932 	assert(!is_active(task->itk_space));
1933 	assert(!task->active);
1934 	assert(task->active_thread_count == 0);
1935 
1936 	lck_mtx_lock(&tasks_threads_lock);
1937 	assert(terminated_tasks_count > 0);
1938 	queue_remove(&terminated_tasks, task, task_t, tasks);
1939 	terminated_tasks_count--;
1940 	lck_mtx_unlock(&tasks_threads_lock);
1941 
1942 	/*
1943 	 * remove the reference on bank context
1944 	 */
1945 	task_bank_reset(task);
1946 
1947 	kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
1948 
1949 	/*
1950 	 *	Give the machine dependent code a chance
1951 	 *	to perform cleanup before ripping apart
1952 	 *	the task.
1953 	 */
1954 	machine_task_terminate(task);
1955 
1956 	ipc_task_terminate(task);
1957 
1958 	/* let iokit know */
1959 	iokit_task_terminate(task);
1960 
1961 	/* Unregister task from userspace coredumps on panic */
1962 	kern_unregister_userspace_coredump(task);
1963 
1964 	if (task->affinity_space) {
1965 		task_affinity_deallocate(task);
1966 	}
1967 
1968 #if MACH_ASSERT
1969 	if (task->ledger != NULL &&
1970 	    task->map != NULL &&
1971 	    task->map->pmap != NULL &&
1972 	    task->map->pmap->ledger != NULL) {
1973 		assert(task->ledger == task->map->pmap->ledger);
1974 	}
1975 #endif /* MACH_ASSERT */
1976 
1977 	vm_owned_objects_disown(task);
1978 	assert(task->task_objects_disowned);
1979 	if (task->task_owned_objects != 0) {
1980 		panic("task_deallocate(%p): "
1981 		    "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1982 		    task,
1983 		    task->task_volatile_objects,
1984 		    task->task_nonvolatile_objects,
1985 		    task->task_owned_objects);
1986 	}
1987 
1988 #if CONFIG_DEFERRED_RECLAIM
1989 	if (task->deferred_reclamation_metadata != NULL) {
1990 		vm_deferred_reclamation_buffer_deallocate(task->deferred_reclamation_metadata);
1991 		task->deferred_reclamation_metadata = NULL;
1992 	}
1993 #endif /* CONFIG_DEFERRED_RECLAIM */
1994 
1995 	vm_map_deallocate(task->map);
1996 	if (task->is_large_corpse) {
1997 		assert(large_corpse_count > 0);
1998 		OSDecrementAtomic(&large_corpse_count);
1999 		task->is_large_corpse = false;
2000 	}
2001 	is_release(task->itk_space);
2002 	if (task->t_rr_ranges) {
2003 		restartable_ranges_release(task->t_rr_ranges);
2004 	}
2005 
2006 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2007 	    &interrupt_wakeups, &debit);
2008 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2009 	    &platform_idle_wakeups, &debit);
2010 
2011 #if defined(CONFIG_SCHED_MULTIQ)
2012 	sched_group_destroy(task->sched_group);
2013 #endif
2014 
2015 	struct recount_times_mach sum = { 0 };
2016 	struct recount_times_mach p_only = { 0 };
2017 	recount_task_times_perf_only(task, &sum, &p_only);
2018 #if CONFIG_PERVASIVE_ENERGY
2019 	uint64_t energy = recount_task_energy_nj(task);
2020 #endif /* CONFIG_PERVASIVE_ENERGY */
2021 	recount_task_deinit(&task->tk_recount);
2022 
2023 	/* Accumulate statistics for dead tasks */
2024 	lck_spin_lock(&dead_task_statistics_lock);
2025 	dead_task_statistics.total_user_time += sum.rtm_user;
2026 	dead_task_statistics.total_system_time += sum.rtm_system;
2027 
2028 	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2029 	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2030 
2031 	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2032 	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2033 	dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2034 	dead_task_statistics.total_pset_switches += task->ps_switch;
2035 	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2036 #if CONFIG_PERVASIVE_ENERGY
2037 	dead_task_statistics.task_energy += energy;
2038 #endif /* CONFIG_PERVASIVE_ENERGY */
2039 
2040 	lck_spin_unlock(&dead_task_statistics_lock);
2041 	lck_mtx_destroy(&task->lock, &task_lck_grp);
2042 
2043 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
2044 	    &debit)) {
2045 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2046 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2047 	}
2048 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
2049 	    &debit)) {
2050 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2051 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2052 	}
2053 	ledger_dereference(task->ledger);
2054 
2055 	counter_free(&task->faults);
2056 	counter_free(&task->pageins);
2057 	counter_free(&task->cow_faults);
2058 	counter_free(&task->messages_sent);
2059 	counter_free(&task->messages_received);
2060 
2061 #if CONFIG_COALITIONS
2062 	task_release_coalitions(task);
2063 #endif /* CONFIG_COALITIONS */
2064 
2065 	bzero(task->coalition, sizeof(task->coalition));
2066 
2067 #if MACH_BSD
2068 	/* clean up collected information since last reference to task is gone */
2069 	if (task->corpse_info) {
2070 		void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
2071 		task_crashinfo_destroy(task->corpse_info);
2072 		task->corpse_info = NULL;
2073 		kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2074 	}
2075 #endif
2076 
2077 #if CONFIG_MACF
2078 	if (get_task_crash_label(task)) {
2079 		mac_exc_free_label(get_task_crash_label(task));
2080 		set_task_crash_label(task, NULL);
2081 	}
2082 #endif
2083 
2084 	assert(queue_empty(&task->task_objq));
2085 	task_objq_lock_destroy(task);
2086 
2087 	if (task->corpse_vmobject_list) {
2088 		kfree_data(task->corpse_vmobject_list,
2089 		    (vm_size_t)task->corpse_vmobject_list_size);
2090 	}
2091 
2092 	task_ref_count_fini(task);
2093 
2094 	task->bsd_info_ro = proc_ro_release_task((proc_ro_t)task->bsd_info_ro);
2095 
2096 	if (task->bsd_info_ro != NULL) {
2097 		proc_ro_free(task->bsd_info_ro);
2098 		task->bsd_info_ro = NULL;
2099 	}
2100 
2101 	task_release_proc_task_struct(task);
2102 }
2103 
2104 /*
2105  *	task_name_deallocate_mig:
2106  *
2107  *	Drop a reference on a task name.
2108  */
2109 void
task_name_deallocate_mig(task_name_t task_name)2110 task_name_deallocate_mig(
2111 	task_name_t             task_name)
2112 {
2113 	return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2114 }
2115 
2116 /*
2117  *	task_policy_set_deallocate_mig:
2118  *
2119  *	Drop a reference on a task type.
2120  */
2121 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2122 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2123 {
2124 	return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2125 }
2126 
2127 /*
2128  *	task_policy_get_deallocate_mig:
2129  *
2130  *	Drop a reference on a task type.
2131  */
2132 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2133 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2134 {
2135 	return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2136 }
2137 
2138 /*
2139  *	task_inspect_deallocate_mig:
2140  *
2141  *	Drop a task inspection reference.
2142  */
2143 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2144 task_inspect_deallocate_mig(
2145 	task_inspect_t          task_inspect)
2146 {
2147 	return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2148 }
2149 
2150 /*
2151  *	task_read_deallocate_mig:
2152  *
2153  *	Drop a reference on task read port.
2154  */
2155 void
task_read_deallocate_mig(task_read_t task_read)2156 task_read_deallocate_mig(
2157 	task_read_t          task_read)
2158 {
2159 	return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2160 }
2161 
2162 /*
2163  *	task_suspension_token_deallocate:
2164  *
2165  *	Drop a reference on a task suspension token.
2166  */
2167 void
task_suspension_token_deallocate(task_suspension_token_t token)2168 task_suspension_token_deallocate(
2169 	task_suspension_token_t         token)
2170 {
2171 	return task_deallocate((task_t)token);
2172 }
2173 
2174 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2175 task_suspension_token_deallocate_grp(
2176 	task_suspension_token_t         token,
2177 	task_grp_t                      grp)
2178 {
2179 	return task_deallocate_grp((task_t)token, grp);
2180 }
2181 
2182 /*
2183  * task_collect_crash_info:
2184  *
2185  * collect crash info from bsd and mach based data
2186  */
2187 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2188 task_collect_crash_info(
2189 	task_t task,
2190 #ifdef CONFIG_MACF
2191 	struct label *crash_label,
2192 #endif
2193 	int is_corpse_fork)
2194 {
2195 	kern_return_t kr = KERN_SUCCESS;
2196 
2197 	kcdata_descriptor_t crash_data = NULL;
2198 	kcdata_descriptor_t crash_data_release = NULL;
2199 	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2200 	mach_vm_offset_t crash_data_ptr = 0;
2201 	void *crash_data_kernel = NULL;
2202 	void *crash_data_kernel_release = NULL;
2203 #if CONFIG_MACF
2204 	struct label *label, *free_label;
2205 #endif
2206 
2207 	if (!corpses_enabled()) {
2208 		return KERN_NOT_SUPPORTED;
2209 	}
2210 
2211 #if CONFIG_MACF
2212 	free_label = label = mac_exc_create_label(NULL);
2213 #endif
2214 
2215 	task_lock(task);
2216 
2217 	assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2218 	if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2219 #if CONFIG_MACF
2220 		/* Set the crash label, used by the exception delivery mac hook */
2221 		free_label = get_task_crash_label(task);         // Most likely NULL.
2222 		set_task_crash_label(task, label);
2223 		mac_exc_update_task_crash_label(task, crash_label);
2224 #endif
2225 		task_unlock(task);
2226 
2227 		crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2228 		    Z_WAITOK | Z_ZERO);
2229 		if (crash_data_kernel == NULL) {
2230 			kr = KERN_RESOURCE_SHORTAGE;
2231 			goto out_no_lock;
2232 		}
2233 		crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2234 
2235 		/* Do not get a corpse ref for corpse fork */
2236 		crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2237 		    is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2238 		    KCFLAG_USE_MEMCOPY);
2239 		if (crash_data) {
2240 			task_lock(task);
2241 			crash_data_release = task->corpse_info;
2242 			crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2243 			task->corpse_info = crash_data;
2244 
2245 			task_unlock(task);
2246 			kr = KERN_SUCCESS;
2247 		} else {
2248 			kfree_data(crash_data_kernel,
2249 			    CORPSEINFO_ALLOCATION_SIZE);
2250 			kr = KERN_FAILURE;
2251 		}
2252 
2253 		if (crash_data_release != NULL) {
2254 			task_crashinfo_destroy(crash_data_release);
2255 		}
2256 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2257 	} else {
2258 		task_unlock(task);
2259 	}
2260 
2261 out_no_lock:
2262 #if CONFIG_MACF
2263 	if (free_label != NULL) {
2264 		mac_exc_free_label(free_label);
2265 	}
2266 #endif
2267 	return kr;
2268 }
2269 
2270 /*
2271  * task_deliver_crash_notification:
2272  *
2273  * Makes outcall to registered host port for a corpse.
2274  */
2275 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2276 task_deliver_crash_notification(
2277 	task_t corpse, /* corpse or corpse fork */
2278 	thread_t thread,
2279 	exception_type_t etype,
2280 	mach_exception_subcode_t subcode)
2281 {
2282 	kcdata_descriptor_t crash_info = corpse->corpse_info;
2283 	thread_t th_iter = NULL;
2284 	kern_return_t kr = KERN_SUCCESS;
2285 	wait_interrupt_t wsave;
2286 	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2287 	ipc_port_t corpse_port;
2288 
2289 	if (crash_info == NULL) {
2290 		return KERN_FAILURE;
2291 	}
2292 
2293 	assert(task_is_a_corpse(corpse));
2294 
2295 	task_lock(corpse);
2296 
2297 	/*
2298 	 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2299 	 * Crash reporters should derive whether it's fatal from corpse blob.
2300 	 */
2301 	code[0] = etype;
2302 	code[1] = subcode;
2303 
2304 	queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2305 	{
2306 		if (th_iter->corpse_dup == FALSE) {
2307 			ipc_thread_reset(th_iter);
2308 		}
2309 	}
2310 	task_unlock(corpse);
2311 
2312 	/* Arm the no-sender notification for taskport */
2313 	task_reference(corpse);
2314 	corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2315 
2316 	wsave = thread_interrupt_level(THREAD_UNINT);
2317 	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2318 	if (kr != KERN_SUCCESS) {
2319 		printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2320 	}
2321 
2322 	(void)thread_interrupt_level(wsave);
2323 
2324 	/*
2325 	 * Drop the send right on corpse port, will fire the
2326 	 * no-sender notification if exception deliver failed.
2327 	 */
2328 	ipc_port_release_send(corpse_port);
2329 	return kr;
2330 }
2331 
2332 /*
2333  *	task_terminate:
2334  *
2335  *	Terminate the specified task.  See comments on thread_terminate
2336  *	(kern/thread.c) about problems with terminating the "current task."
2337  */
2338 
2339 kern_return_t
task_terminate(task_t task)2340 task_terminate(
2341 	task_t          task)
2342 {
2343 	if (task == TASK_NULL) {
2344 		return KERN_INVALID_ARGUMENT;
2345 	}
2346 
2347 	if (get_bsdtask_info(task)) {
2348 		return KERN_FAILURE;
2349 	}
2350 
2351 	return task_terminate_internal(task);
2352 }
2353 
2354 #if MACH_ASSERT
2355 extern int proc_pid(struct proc *);
2356 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2357 #endif /* MACH_ASSERT */
2358 
2359 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2360 static void
task_partial_reap(task_t task,__unused int pid)2361 __unused task_partial_reap(task_t task, __unused int pid)
2362 {
2363 	unsigned int    reclaimed_resident = 0;
2364 	unsigned int    reclaimed_compressed = 0;
2365 	uint64_t        task_page_count;
2366 
2367 	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2368 
2369 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2370 	    pid, task_page_count, 0, 0, 0);
2371 
2372 	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2373 
2374 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2375 	    pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2376 }
2377 
2378 /*
2379  * task_mark_corpse:
2380  *
2381  * Mark the task as a corpse. Called by crashing thread.
2382  */
2383 kern_return_t
task_mark_corpse(task_t task)2384 task_mark_corpse(task_t task)
2385 {
2386 	kern_return_t kr = KERN_SUCCESS;
2387 	thread_t self_thread;
2388 	(void) self_thread;
2389 	wait_interrupt_t wsave;
2390 #if CONFIG_MACF
2391 	struct label *crash_label = NULL;
2392 #endif
2393 
2394 	assert(task != kernel_task);
2395 	assert(task == current_task());
2396 	assert(!task_is_a_corpse(task));
2397 
2398 #if CONFIG_MACF
2399 	crash_label = mac_exc_create_label_for_proc((struct proc*)get_bsdtask_info(task));
2400 #endif
2401 
2402 	kr = task_collect_crash_info(task,
2403 #if CONFIG_MACF
2404 	    crash_label,
2405 #endif
2406 	    FALSE);
2407 	if (kr != KERN_SUCCESS) {
2408 		goto out;
2409 	}
2410 
2411 	self_thread = current_thread();
2412 
2413 	wsave = thread_interrupt_level(THREAD_UNINT);
2414 	task_lock(task);
2415 
2416 	/*
2417 	 * Check if any other thread called task_terminate_internal
2418 	 * and made the task inactive before we could mark it for
2419 	 * corpse pending report. Bail out if the task is inactive.
2420 	 */
2421 	if (!task->active) {
2422 		kcdata_descriptor_t crash_data_release = task->corpse_info;;
2423 		void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2424 
2425 		task->corpse_info = NULL;
2426 		task_unlock(task);
2427 
2428 		if (crash_data_release != NULL) {
2429 			task_crashinfo_destroy(crash_data_release);
2430 		}
2431 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2432 		return KERN_TERMINATED;
2433 	}
2434 
2435 	task_set_corpse_pending_report(task);
2436 	task_set_corpse(task);
2437 	task->crashed_thread_id = thread_tid(self_thread);
2438 
2439 	kr = task_start_halt_locked(task, TRUE);
2440 	assert(kr == KERN_SUCCESS);
2441 
2442 	task_set_uniqueid(task);
2443 
2444 	task_unlock(task);
2445 
2446 	/*
2447 	 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2448 	 * disable old ports here instead.
2449 	 *
2450 	 * The vm_map and ipc_space must exist until this function returns,
2451 	 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2452 	 */
2453 	ipc_task_disable(task);
2454 
2455 	/* terminate the ipc space */
2456 	ipc_space_terminate(task->itk_space);
2457 
2458 	/* Add it to global corpse task list */
2459 	task_add_to_corpse_task_list(task);
2460 
2461 	thread_terminate_internal(self_thread);
2462 
2463 	(void) thread_interrupt_level(wsave);
2464 	assert(task->halting == TRUE);
2465 
2466 out:
2467 #if CONFIG_MACF
2468 	mac_exc_free_label(crash_label);
2469 #endif
2470 	return kr;
2471 }
2472 
2473 /*
2474  *	task_set_uniqueid
2475  *
2476  *	Set task uniqueid to systemwide unique 64 bit value
2477  */
2478 void
task_set_uniqueid(task_t task)2479 task_set_uniqueid(task_t task)
2480 {
2481 	task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2482 }
2483 
2484 /*
2485  *	task_clear_corpse
2486  *
2487  *	Clears the corpse pending bit on task.
2488  *	Removes inspection bit on the threads.
2489  */
2490 void
task_clear_corpse(task_t task)2491 task_clear_corpse(task_t task)
2492 {
2493 	thread_t th_iter = NULL;
2494 
2495 	task_lock(task);
2496 	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2497 	{
2498 		thread_mtx_lock(th_iter);
2499 		th_iter->inspection = FALSE;
2500 		ipc_thread_disable(th_iter);
2501 		thread_mtx_unlock(th_iter);
2502 	}
2503 
2504 	thread_terminate_crashed_threads();
2505 	/* remove the pending corpse report flag */
2506 	task_clear_corpse_pending_report(task);
2507 
2508 	task_unlock(task);
2509 }
2510 
2511 /*
2512  *	task_port_no_senders
2513  *
2514  *	Called whenever the Mach port system detects no-senders on
2515  *	the task port of a corpse.
2516  *	Each notification that comes in should terminate the task (corpse).
2517  */
2518 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2519 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2520 {
2521 	task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2522 
2523 	assert(task != TASK_NULL);
2524 	assert(task_is_a_corpse(task));
2525 
2526 	/* Remove the task from global corpse task list */
2527 	task_remove_from_corpse_task_list(task);
2528 
2529 	task_clear_corpse(task);
2530 	task_terminate_internal(task);
2531 }
2532 
2533 /*
2534  *	task_port_with_flavor_no_senders
2535  *
2536  *	Called whenever the Mach port system detects no-senders on
2537  *	the task inspect or read port. These ports are allocated lazily and
2538  *	should be deallocated here when there are no senders remaining.
2539  */
2540 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2541 task_port_with_flavor_no_senders(
2542 	ipc_port_t          port,
2543 	mach_port_mscount_t mscount __unused)
2544 {
2545 	task_t task;
2546 	mach_task_flavor_t flavor;
2547 	ipc_kobject_type_t kotype;
2548 
2549 	ip_mq_lock(port);
2550 	if (port->ip_srights > 0) {
2551 		ip_mq_unlock(port);
2552 		return;
2553 	}
2554 	kotype = ip_kotype(port);
2555 	assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2556 	task = ipc_kobject_get_locked(port, kotype);
2557 	if (task != TASK_NULL) {
2558 		task_reference(task);
2559 	}
2560 	ip_mq_unlock(port);
2561 
2562 	if (task == TASK_NULL) {
2563 		/* The task is exiting or disabled; it will eventually deallocate the port */
2564 		return;
2565 	}
2566 
2567 	if (kotype == IKOT_TASK_READ) {
2568 		flavor = TASK_FLAVOR_READ;
2569 	} else {
2570 		flavor = TASK_FLAVOR_INSPECT;
2571 	}
2572 
2573 	itk_lock(task);
2574 	ip_mq_lock(port);
2575 
2576 	/*
2577 	 * If the port is no longer active, then ipc_task_terminate() ran
2578 	 * and destroyed the kobject already. Just deallocate the task
2579 	 * ref we took and go away.
2580 	 *
2581 	 * It is also possible that several nsrequests are in flight,
2582 	 * only one shall NULL-out the port entry, and this is the one
2583 	 * that gets to dealloc the port.
2584 	 *
2585 	 * Check for a stale no-senders notification. A call to any function
2586 	 * that vends out send rights to this port could resurrect it between
2587 	 * this notification being generated and actually being handled here.
2588 	 */
2589 	if (!ip_active(port) ||
2590 	    task->itk_task_ports[flavor] != port ||
2591 	    port->ip_srights > 0) {
2592 		ip_mq_unlock(port);
2593 		itk_unlock(task);
2594 		task_deallocate(task);
2595 		return;
2596 	}
2597 
2598 	assert(task->itk_task_ports[flavor] == port);
2599 	task->itk_task_ports[flavor] = IP_NULL;
2600 	itk_unlock(task);
2601 
2602 	ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2603 
2604 	task_deallocate(task);
2605 }
2606 
2607 /*
2608  *	task_wait_till_threads_terminate_locked
2609  *
2610  *	Wait till all the threads in the task are terminated.
2611  *	Might release the task lock and re-acquire it.
2612  */
2613 void
task_wait_till_threads_terminate_locked(task_t task)2614 task_wait_till_threads_terminate_locked(task_t task)
2615 {
2616 	/* wait for all the threads in the task to terminate */
2617 	while (task->active_thread_count != 0) {
2618 		assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2619 		task_unlock(task);
2620 		thread_block(THREAD_CONTINUE_NULL);
2621 
2622 		task_lock(task);
2623 	}
2624 }
2625 
2626 /*
2627  *	task_duplicate_map_and_threads
2628  *
2629  *	Copy vmmap of source task.
2630  *	Copy active threads from source task to destination task.
2631  *	Source task would be suspended during the copy.
2632  */
2633 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2634 task_duplicate_map_and_threads(
2635 	task_t task,
2636 	void *p,
2637 	task_t new_task,
2638 	thread_t *thread_ret,
2639 	uint64_t **udata_buffer,
2640 	int *size,
2641 	int *num_udata,
2642 	bool for_exception)
2643 {
2644 	kern_return_t kr = KERN_SUCCESS;
2645 	int active;
2646 	thread_t thread, self, thread_return = THREAD_NULL;
2647 	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2648 	thread_t *thread_array;
2649 	uint32_t active_thread_count = 0, array_count = 0, i;
2650 	vm_map_t oldmap;
2651 	uint64_t *buffer = NULL;
2652 	int buf_size = 0;
2653 	int est_knotes = 0, num_knotes = 0;
2654 
2655 	self = current_thread();
2656 
2657 	/*
2658 	 * Suspend the task to copy thread state, use the internal
2659 	 * variant so that no user-space process can resume
2660 	 * the task from under us
2661 	 */
2662 	kr = task_suspend_internal(task);
2663 	if (kr != KERN_SUCCESS) {
2664 		return kr;
2665 	}
2666 
2667 	if (task->map->disable_vmentry_reuse == TRUE) {
2668 		/*
2669 		 * Quite likely GuardMalloc (or some debugging tool)
2670 		 * is being used on this task. And it has gone through
2671 		 * its limit. Making a corpse will likely encounter
2672 		 * a lot of VM entries that will need COW.
2673 		 *
2674 		 * Skip it.
2675 		 */
2676 #if DEVELOPMENT || DEBUG
2677 		memorystatus_abort_vm_map_fork(task);
2678 #endif
2679 		task_resume_internal(task);
2680 		return KERN_FAILURE;
2681 	}
2682 
2683 	/* Check with VM if vm_map_fork is allowed for this task */
2684 	bool is_large = false;
2685 	if (memorystatus_allowed_vm_map_fork(task, &is_large)) {
2686 		/* Setup new task's vmmap, switch from parent task's map to it COW map */
2687 		oldmap = new_task->map;
2688 		new_task->map = vm_map_fork(new_task->ledger,
2689 		    task->map,
2690 		    (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2691 		    VM_MAP_FORK_PRESERVE_PURGEABLE |
2692 		    VM_MAP_FORK_CORPSE_FOOTPRINT));
2693 		if (new_task->map) {
2694 			new_task->is_large_corpse = is_large;
2695 			vm_map_deallocate(oldmap);
2696 
2697 			/* copy ledgers that impact the memory footprint */
2698 			vm_map_copy_footprint_ledgers(task, new_task);
2699 
2700 			/* Get all the udata pointers from kqueue */
2701 			est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2702 			if (est_knotes > 0) {
2703 				buf_size = (est_knotes + 32) * sizeof(uint64_t);
2704 				buffer = kalloc_data(buf_size, Z_WAITOK);
2705 				num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2706 				if (num_knotes > est_knotes + 32) {
2707 					num_knotes = est_knotes + 32;
2708 				}
2709 			}
2710 		} else {
2711 			if (is_large) {
2712 				assert(large_corpse_count > 0);
2713 				OSDecrementAtomic(&large_corpse_count);
2714 			}
2715 			new_task->map = oldmap;
2716 #if DEVELOPMENT || DEBUG
2717 			memorystatus_abort_vm_map_fork(task);
2718 #endif
2719 			task_resume_internal(task);
2720 			return KERN_NO_SPACE;
2721 		}
2722 	} else if (!for_exception) {
2723 #if DEVELOPMENT || DEBUG
2724 		memorystatus_abort_vm_map_fork(task);
2725 #endif
2726 		task_resume_internal(task);
2727 		return KERN_NO_SPACE;
2728 	}
2729 
2730 	active_thread_count = task->active_thread_count;
2731 	if (active_thread_count == 0) {
2732 		kfree_data(buffer, buf_size);
2733 		task_resume_internal(task);
2734 		return KERN_FAILURE;
2735 	}
2736 
2737 	thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2738 
2739 	/* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2740 	task_lock(task);
2741 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2742 		/* Skip inactive threads */
2743 		active = thread->active;
2744 		if (!active) {
2745 			continue;
2746 		}
2747 
2748 		if (array_count >= active_thread_count) {
2749 			break;
2750 		}
2751 
2752 		thread_array[array_count++] = thread;
2753 		thread_reference(thread);
2754 	}
2755 	task_unlock(task);
2756 
2757 	for (i = 0; i < array_count; i++) {
2758 		kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2759 		if (kr != KERN_SUCCESS) {
2760 			break;
2761 		}
2762 
2763 		/* Equivalent of current thread in corpse */
2764 		if (thread_array[i] == self) {
2765 			thread_return = new_thread;
2766 			new_task->crashed_thread_id = thread_tid(new_thread);
2767 		} else if (first_thread == NULL) {
2768 			first_thread = new_thread;
2769 		} else {
2770 			/* drop the extra ref returned by thread_create_with_continuation */
2771 			thread_deallocate(new_thread);
2772 		}
2773 
2774 		kr = thread_dup2(thread_array[i], new_thread);
2775 		if (kr != KERN_SUCCESS) {
2776 			thread_mtx_lock(new_thread);
2777 			new_thread->corpse_dup = TRUE;
2778 			thread_mtx_unlock(new_thread);
2779 			continue;
2780 		}
2781 
2782 		/* Copy thread name */
2783 		bsd_copythreadname(get_bsdthread_info(new_thread),
2784 		    get_bsdthread_info(thread_array[i]));
2785 		new_thread->thread_tag = thread_array[i]->thread_tag &
2786 		    ~THREAD_TAG_USER_JOIN;
2787 		thread_copy_resource_info(new_thread, thread_array[i]);
2788 	}
2789 
2790 	/* return the first thread if we couldn't find the equivalent of current */
2791 	if (thread_return == THREAD_NULL) {
2792 		thread_return = first_thread;
2793 	} else if (first_thread != THREAD_NULL) {
2794 		/* drop the extra ref returned by thread_create_with_continuation */
2795 		thread_deallocate(first_thread);
2796 	}
2797 
2798 	task_resume_internal(task);
2799 
2800 	for (i = 0; i < array_count; i++) {
2801 		thread_deallocate(thread_array[i]);
2802 	}
2803 	kfree_type(thread_t, active_thread_count, thread_array);
2804 
2805 	if (kr == KERN_SUCCESS) {
2806 		*thread_ret = thread_return;
2807 		*udata_buffer = buffer;
2808 		*size = buf_size;
2809 		*num_udata = num_knotes;
2810 	} else {
2811 		if (thread_return != THREAD_NULL) {
2812 			thread_deallocate(thread_return);
2813 		}
2814 		kfree_data(buffer, buf_size);
2815 	}
2816 
2817 	return kr;
2818 }
2819 
2820 #if CONFIG_SECLUDED_MEMORY
2821 extern void task_set_can_use_secluded_mem_locked(
2822 	task_t          task,
2823 	boolean_t       can_use_secluded_mem);
2824 #endif /* CONFIG_SECLUDED_MEMORY */
2825 
2826 #if MACH_ASSERT
2827 int debug4k_panic_on_terminate = 0;
2828 #endif /* MACH_ASSERT */
2829 kern_return_t
task_terminate_internal(task_t task)2830 task_terminate_internal(
2831 	task_t                  task)
2832 {
2833 	thread_t                        thread, self;
2834 	task_t                          self_task;
2835 	boolean_t                       interrupt_save;
2836 	int                             pid = 0;
2837 
2838 	assert(task != kernel_task);
2839 
2840 	self = current_thread();
2841 	self_task = current_task();
2842 
2843 	/*
2844 	 *	Get the task locked and make sure that we are not racing
2845 	 *	with someone else trying to terminate us.
2846 	 */
2847 	if (task == self_task) {
2848 		task_lock(task);
2849 	} else if (task < self_task) {
2850 		task_lock(task);
2851 		task_lock(self_task);
2852 	} else {
2853 		task_lock(self_task);
2854 		task_lock(task);
2855 	}
2856 
2857 #if CONFIG_SECLUDED_MEMORY
2858 	if (task->task_can_use_secluded_mem) {
2859 		task_set_can_use_secluded_mem_locked(task, FALSE);
2860 	}
2861 	task->task_could_use_secluded_mem = FALSE;
2862 	task->task_could_also_use_secluded_mem = FALSE;
2863 
2864 	if (task->task_suppressed_secluded) {
2865 		stop_secluded_suppression(task);
2866 	}
2867 #endif /* CONFIG_SECLUDED_MEMORY */
2868 
2869 	if (!task->active) {
2870 		/*
2871 		 *	Task is already being terminated.
2872 		 *	Just return an error. If we are dying, this will
2873 		 *	just get us to our AST special handler and that
2874 		 *	will get us to finalize the termination of ourselves.
2875 		 */
2876 		task_unlock(task);
2877 		if (self_task != task) {
2878 			task_unlock(self_task);
2879 		}
2880 
2881 		return KERN_FAILURE;
2882 	}
2883 
2884 	if (task_corpse_pending_report(task)) {
2885 		/*
2886 		 *	Task is marked for reporting as corpse.
2887 		 *	Just return an error. This will
2888 		 *	just get us to our AST special handler and that
2889 		 *	will get us to finish the path to death
2890 		 */
2891 		task_unlock(task);
2892 		if (self_task != task) {
2893 			task_unlock(self_task);
2894 		}
2895 
2896 		return KERN_FAILURE;
2897 	}
2898 
2899 	if (self_task != task) {
2900 		task_unlock(self_task);
2901 	}
2902 
2903 	/*
2904 	 * Make sure the current thread does not get aborted out of
2905 	 * the waits inside these operations.
2906 	 */
2907 	interrupt_save = thread_interrupt_level(THREAD_UNINT);
2908 
2909 	/*
2910 	 *	Indicate that we want all the threads to stop executing
2911 	 *	at user space by holding the task (we would have held
2912 	 *	each thread independently in thread_terminate_internal -
2913 	 *	but this way we may be more likely to already find it
2914 	 *	held there).  Mark the task inactive, and prevent
2915 	 *	further task operations via the task port.
2916 	 *
2917 	 *	The vm_map and ipc_space must exist until this function returns,
2918 	 *	convert_port_to_{map,space}_with_flavor relies on this behavior.
2919 	 */
2920 	task_hold_locked(task);
2921 	task->active = FALSE;
2922 	ipc_task_disable(task);
2923 
2924 #if CONFIG_TELEMETRY
2925 	/*
2926 	 * Notify telemetry that this task is going away.
2927 	 */
2928 	telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2929 #endif
2930 
2931 	/*
2932 	 *	Terminate each thread in the task.
2933 	 */
2934 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2935 		thread_terminate_internal(thread);
2936 	}
2937 
2938 #ifdef MACH_BSD
2939 	void *bsd_info = get_bsdtask_info(task);
2940 	if (bsd_info != NULL) {
2941 		pid = proc_pid(bsd_info);
2942 	}
2943 #endif /* MACH_BSD */
2944 
2945 	task_unlock(task);
2946 
2947 	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2948 	    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2949 
2950 	/* Early object reap phase */
2951 
2952 // PR-17045188: Revisit implementation
2953 //        task_partial_reap(task, pid);
2954 
2955 #if CONFIG_TASKWATCH
2956 	/*
2957 	 * remove all task watchers
2958 	 */
2959 	task_removewatchers(task);
2960 
2961 #endif /* CONFIG_TASKWATCH */
2962 
2963 	/*
2964 	 *	Destroy all synchronizers owned by the task.
2965 	 */
2966 	task_synchronizer_destroy_all(task);
2967 
2968 	/*
2969 	 *	Clear the watchport boost on the task.
2970 	 */
2971 	task_remove_turnstile_watchports(task);
2972 
2973 	/*
2974 	 *	Destroy the IPC space, leaving just a reference for it.
2975 	 */
2976 	ipc_space_terminate(task->itk_space);
2977 
2978 #if 00
2979 	/* if some ledgers go negative on tear-down again... */
2980 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2981 	    task_ledgers.phys_footprint);
2982 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2983 	    task_ledgers.internal);
2984 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2985 	    task_ledgers.iokit_mapped);
2986 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2987 	    task_ledgers.alternate_accounting);
2988 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2989 	    task_ledgers.alternate_accounting_compressed);
2990 #endif
2991 
2992 	/*
2993 	 * If the current thread is a member of the task
2994 	 * being terminated, then the last reference to
2995 	 * the task will not be dropped until the thread
2996 	 * is finally reaped.  To avoid incurring the
2997 	 * expense of removing the address space regions
2998 	 * at reap time, we do it explictly here.
2999 	 */
3000 
3001 #if MACH_ASSERT
3002 	/*
3003 	 * Identify the pmap's process, in case the pmap ledgers drift
3004 	 * and we have to report it.
3005 	 */
3006 	char procname[17];
3007 	void *proc = get_bsdtask_info(task);
3008 	if (proc) {
3009 		pid = proc_pid(proc);
3010 		proc_name_kdp(proc, procname, sizeof(procname));
3011 	} else {
3012 		pid = 0;
3013 		strlcpy(procname, "<unknown>", sizeof(procname));
3014 	}
3015 	pmap_set_process(task->map->pmap, pid, procname);
3016 	if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3017 		DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3018 		if (debug4k_panic_on_terminate) {
3019 			panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3020 		}
3021 	}
3022 #endif /* MACH_ASSERT */
3023 
3024 	vm_map_terminate(task->map);
3025 
3026 	/* release our shared region */
3027 	vm_shared_region_set(task, NULL);
3028 
3029 #if __has_feature(ptrauth_calls)
3030 	task_set_shared_region_id(task, NULL);
3031 #endif /* __has_feature(ptrauth_calls) */
3032 
3033 	lck_mtx_lock(&tasks_threads_lock);
3034 	queue_remove(&tasks, task, task_t, tasks);
3035 	queue_enter(&terminated_tasks, task, task_t, tasks);
3036 	tasks_count--;
3037 	terminated_tasks_count++;
3038 	lck_mtx_unlock(&tasks_threads_lock);
3039 
3040 	/*
3041 	 * We no longer need to guard against being aborted, so restore
3042 	 * the previous interruptible state.
3043 	 */
3044 	thread_interrupt_level(interrupt_save);
3045 
3046 #if KPC
3047 	/* force the task to release all ctrs */
3048 	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3049 		kpc_force_all_ctrs(task, 0);
3050 	}
3051 #endif /* KPC */
3052 
3053 #if CONFIG_COALITIONS
3054 	/*
3055 	 * Leave the coalition for corpse task or task that
3056 	 * never had any active threads (e.g. fork, exec failure).
3057 	 * For task with active threads, the task will be removed
3058 	 * from coalition by last terminating thread.
3059 	 */
3060 	if (task->active_thread_count == 0) {
3061 		coalitions_remove_task(task);
3062 	}
3063 #endif
3064 
3065 #if CONFIG_FREEZE
3066 	extern int      vm_compressor_available;
3067 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3068 		task_disown_frozen_csegs(task);
3069 		assert(queue_empty(&task->task_frozen_cseg_q));
3070 	}
3071 #endif /* CONFIG_FREEZE */
3072 
3073 
3074 	/*
3075 	 * Get rid of the task active reference on itself.
3076 	 */
3077 	task_deallocate_grp(task, TASK_GRP_INTERNAL);
3078 
3079 	return KERN_SUCCESS;
3080 }
3081 
3082 void
tasks_system_suspend(boolean_t suspend)3083 tasks_system_suspend(boolean_t suspend)
3084 {
3085 	task_t task;
3086 
3087 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3088 	    (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3089 
3090 	lck_mtx_lock(&tasks_threads_lock);
3091 	assert(tasks_suspend_state != suspend);
3092 	tasks_suspend_state = suspend;
3093 	queue_iterate(&tasks, task, task_t, tasks) {
3094 		if (task == kernel_task) {
3095 			continue;
3096 		}
3097 		suspend ? task_suspend_internal(task) : task_resume_internal(task);
3098 	}
3099 	lck_mtx_unlock(&tasks_threads_lock);
3100 }
3101 
3102 /*
3103  * task_start_halt:
3104  *
3105  *      Shut the current task down (except for the current thread) in
3106  *	preparation for dramatic changes to the task (probably exec).
3107  *	We hold the task and mark all other threads in the task for
3108  *	termination.
3109  */
3110 kern_return_t
task_start_halt(task_t task)3111 task_start_halt(task_t task)
3112 {
3113 	kern_return_t kr = KERN_SUCCESS;
3114 	task_lock(task);
3115 	kr = task_start_halt_locked(task, FALSE);
3116 	task_unlock(task);
3117 	return kr;
3118 }
3119 
3120 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3121 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3122 {
3123 	thread_t thread, self;
3124 	uint64_t dispatchqueue_offset;
3125 
3126 	assert(task != kernel_task);
3127 
3128 	self = current_thread();
3129 
3130 	if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3131 		return KERN_INVALID_ARGUMENT;
3132 	}
3133 
3134 	if (!should_mark_corpse &&
3135 	    (task->halting || !task->active || !self->active)) {
3136 		/*
3137 		 * Task or current thread is already being terminated.
3138 		 * Hurry up and return out of the current kernel context
3139 		 * so that we run our AST special handler to terminate
3140 		 * ourselves. If should_mark_corpse is set, the corpse
3141 		 * creation might have raced with exec, let the corpse
3142 		 * creation continue, once the current thread reaches AST
3143 		 * thread in exec will be woken up from task_complete_halt.
3144 		 * Exec will fail cause the proc was marked for exit.
3145 		 * Once the thread in exec reaches AST, it will call proc_exit
3146 		 * and deliver the EXC_CORPSE_NOTIFY.
3147 		 */
3148 		return KERN_FAILURE;
3149 	}
3150 
3151 	/* Thread creation will fail after this point of no return. */
3152 	task->halting = TRUE;
3153 
3154 	/*
3155 	 * Mark all the threads to keep them from starting any more
3156 	 * user-level execution. The thread_terminate_internal code
3157 	 * would do this on a thread by thread basis anyway, but this
3158 	 * gives us a better chance of not having to wait there.
3159 	 */
3160 	task_hold_locked(task);
3161 	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3162 
3163 	/*
3164 	 * Terminate all the other threads in the task.
3165 	 */
3166 	queue_iterate(&task->threads, thread, thread_t, task_threads)
3167 	{
3168 		/*
3169 		 * Remove priority throttles for threads to terminate timely. This has
3170 		 * to be done after task_hold_locked() traps all threads to AST, but before
3171 		 * threads are marked inactive in thread_terminate_internal(). Takes thread
3172 		 * mutex lock.
3173 		 *
3174 		 * We need task_is_a_corpse() check so that we don't accidently update policy
3175 		 * for tasks that are doing posix_spawn().
3176 		 *
3177 		 * See: thread_policy_update_tasklocked().
3178 		 */
3179 		if (task_is_a_corpse(task)) {
3180 			proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3181 			    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3182 		}
3183 
3184 		if (should_mark_corpse) {
3185 			thread_mtx_lock(thread);
3186 			thread->inspection = TRUE;
3187 			thread_mtx_unlock(thread);
3188 		}
3189 		if (thread != self) {
3190 			thread_terminate_internal(thread);
3191 		}
3192 	}
3193 	task->dispatchqueue_offset = dispatchqueue_offset;
3194 
3195 	task_release_locked(task);
3196 
3197 	return KERN_SUCCESS;
3198 }
3199 
3200 
3201 /*
3202  * task_complete_halt:
3203  *
3204  *	Complete task halt by waiting for threads to terminate, then clean
3205  *	up task resources (VM, port namespace, etc...) and then let the
3206  *	current thread go in the (practically empty) task context.
3207  *
3208  *	Note: task->halting flag is not cleared in order to avoid creation
3209  *	of new thread in old exec'ed task.
3210  */
3211 void
task_complete_halt(task_t task)3212 task_complete_halt(task_t task)
3213 {
3214 	task_lock(task);
3215 	assert(task->halting);
3216 	assert(task == current_task());
3217 
3218 	/*
3219 	 *	Wait for the other threads to get shut down.
3220 	 *      When the last other thread is reaped, we'll be
3221 	 *	woken up.
3222 	 */
3223 	if (task->thread_count > 1) {
3224 		assert_wait((event_t)&task->halting, THREAD_UNINT);
3225 		task_unlock(task);
3226 		thread_block(THREAD_CONTINUE_NULL);
3227 	} else {
3228 		task_unlock(task);
3229 	}
3230 
3231 	/*
3232 	 *	Give the machine dependent code a chance
3233 	 *	to perform cleanup of task-level resources
3234 	 *	associated with the current thread before
3235 	 *	ripping apart the task.
3236 	 */
3237 	machine_task_terminate(task);
3238 
3239 	/*
3240 	 *	Destroy all synchronizers owned by the task.
3241 	 */
3242 	task_synchronizer_destroy_all(task);
3243 
3244 	/*
3245 	 *	Terminate the IPC space.  A long time ago,
3246 	 *	this used to be ipc_space_clean() which would
3247 	 *	keep the space active but hollow it.
3248 	 *
3249 	 *	We really do not need this semantics given
3250 	 *	tasks die with exec now.
3251 	 */
3252 	ipc_space_terminate(task->itk_space);
3253 
3254 	/*
3255 	 * Clean out the address space, as we are going to be
3256 	 * getting a new one.
3257 	 */
3258 	vm_map_terminate(task->map);
3259 
3260 	/*
3261 	 * Kick out any IOKitUser handles to the task. At best they're stale,
3262 	 * at worst someone is racing a SUID exec.
3263 	 */
3264 	iokit_task_terminate(task);
3265 }
3266 
3267 /*
3268  *	task_hold_locked:
3269  *
3270  *	Suspend execution of the specified task.
3271  *	This is a recursive-style suspension of the task, a count of
3272  *	suspends is maintained.
3273  *
3274  *	CONDITIONS: the task is locked and active.
3275  */
3276 void
task_hold_locked(task_t task)3277 task_hold_locked(
3278 	task_t          task)
3279 {
3280 	thread_t        thread;
3281 	void *bsd_info = get_bsdtask_info(task);
3282 
3283 	assert(task->active);
3284 
3285 	if (task->suspend_count++ > 0) {
3286 		return;
3287 	}
3288 
3289 	if (bsd_info) {
3290 		workq_proc_suspended(bsd_info);
3291 	}
3292 
3293 	/*
3294 	 *	Iterate through all the threads and hold them.
3295 	 */
3296 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3297 		thread_mtx_lock(thread);
3298 		thread_hold(thread);
3299 		thread_mtx_unlock(thread);
3300 	}
3301 }
3302 
3303 /*
3304  *	task_hold:
3305  *
3306  *	Same as the internal routine above, except that is must lock
3307  *	and verify that the task is active.  This differs from task_suspend
3308  *	in that it places a kernel hold on the task rather than just a
3309  *	user-level hold.  This keeps users from over resuming and setting
3310  *	it running out from under the kernel.
3311  *
3312  *      CONDITIONS: the caller holds a reference on the task
3313  */
3314 kern_return_t
task_hold(task_t task)3315 task_hold(
3316 	task_t          task)
3317 {
3318 	if (task == TASK_NULL) {
3319 		return KERN_INVALID_ARGUMENT;
3320 	}
3321 
3322 	task_lock(task);
3323 
3324 	if (!task->active) {
3325 		task_unlock(task);
3326 
3327 		return KERN_FAILURE;
3328 	}
3329 
3330 	task_hold_locked(task);
3331 	task_unlock(task);
3332 
3333 	return KERN_SUCCESS;
3334 }
3335 
3336 kern_return_t
task_wait(task_t task,boolean_t until_not_runnable)3337 task_wait(
3338 	task_t          task,
3339 	boolean_t       until_not_runnable)
3340 {
3341 	if (task == TASK_NULL) {
3342 		return KERN_INVALID_ARGUMENT;
3343 	}
3344 
3345 	task_lock(task);
3346 
3347 	if (!task->active) {
3348 		task_unlock(task);
3349 
3350 		return KERN_FAILURE;
3351 	}
3352 
3353 	task_wait_locked(task, until_not_runnable);
3354 	task_unlock(task);
3355 
3356 	return KERN_SUCCESS;
3357 }
3358 
3359 /*
3360  *	task_wait_locked:
3361  *
3362  *	Wait for all threads in task to stop.
3363  *
3364  * Conditions:
3365  *	Called with task locked, active, and held.
3366  */
3367 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3368 task_wait_locked(
3369 	task_t          task,
3370 	boolean_t               until_not_runnable)
3371 {
3372 	thread_t        thread, self;
3373 
3374 	assert(task->active);
3375 	assert(task->suspend_count > 0);
3376 
3377 	self = current_thread();
3378 
3379 	/*
3380 	 *	Iterate through all the threads and wait for them to
3381 	 *	stop.  Do not wait for the current thread if it is within
3382 	 *	the task.
3383 	 */
3384 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3385 		if (thread != self) {
3386 			thread_wait(thread, until_not_runnable);
3387 		}
3388 	}
3389 }
3390 
3391 boolean_t
task_is_app_suspended(task_t task)3392 task_is_app_suspended(task_t task)
3393 {
3394 	return task->pidsuspended;
3395 }
3396 
3397 /*
3398  *	task_release_locked:
3399  *
3400  *	Release a kernel hold on a task.
3401  *
3402  *      CONDITIONS: the task is locked and active
3403  */
3404 void
task_release_locked(task_t task)3405 task_release_locked(
3406 	task_t          task)
3407 {
3408 	thread_t        thread;
3409 	void *bsd_info = get_bsdtask_info(task);
3410 
3411 	assert(task->active);
3412 	assert(task->suspend_count > 0);
3413 
3414 	if (--task->suspend_count > 0) {
3415 		return;
3416 	}
3417 
3418 	if (bsd_info) {
3419 		workq_proc_resumed(bsd_info);
3420 	}
3421 
3422 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3423 		thread_mtx_lock(thread);
3424 		thread_release(thread);
3425 		thread_mtx_unlock(thread);
3426 	}
3427 }
3428 
3429 /*
3430  *	task_release:
3431  *
3432  *	Same as the internal routine above, except that it must lock
3433  *	and verify that the task is active.
3434  *
3435  *      CONDITIONS: The caller holds a reference to the task
3436  */
3437 kern_return_t
task_release(task_t task)3438 task_release(
3439 	task_t          task)
3440 {
3441 	if (task == TASK_NULL) {
3442 		return KERN_INVALID_ARGUMENT;
3443 	}
3444 
3445 	task_lock(task);
3446 
3447 	if (!task->active) {
3448 		task_unlock(task);
3449 
3450 		return KERN_FAILURE;
3451 	}
3452 
3453 	task_release_locked(task);
3454 	task_unlock(task);
3455 
3456 	return KERN_SUCCESS;
3457 }
3458 
3459 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3460 task_threads_internal(
3461 	task_t                      task,
3462 	thread_act_array_t         *threads_out,
3463 	mach_msg_type_number_t     *countp,
3464 	mach_thread_flavor_t        flavor)
3465 {
3466 	mach_msg_type_number_t  actual, count, count_needed;
3467 	thread_t               *thread_list;
3468 	thread_t                thread;
3469 	unsigned int            i;
3470 
3471 	count = 0;
3472 	thread_list = NULL;
3473 
3474 	if (task == TASK_NULL) {
3475 		return KERN_INVALID_ARGUMENT;
3476 	}
3477 
3478 	assert(flavor <= THREAD_FLAVOR_INSPECT);
3479 
3480 	for (;;) {
3481 		task_lock(task);
3482 		if (!task->active) {
3483 			task_unlock(task);
3484 
3485 			kfree_type(thread_t, count, thread_list);
3486 			return KERN_FAILURE;
3487 		}
3488 
3489 		count_needed = actual = task->thread_count;
3490 		if (count_needed <= count) {
3491 			break;
3492 		}
3493 
3494 		/* unlock the task and allocate more memory */
3495 		task_unlock(task);
3496 
3497 		kfree_type(thread_t, count, thread_list);
3498 		count = count_needed;
3499 		thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3500 
3501 		if (thread_list == NULL) {
3502 			return KERN_RESOURCE_SHORTAGE;
3503 		}
3504 	}
3505 
3506 	i = 0;
3507 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3508 		assert(i < actual);
3509 		thread_reference(thread);
3510 		thread_list[i++] = thread;
3511 	}
3512 
3513 	count_needed = actual;
3514 
3515 	/* can unlock task now that we've got the thread refs */
3516 	task_unlock(task);
3517 
3518 	if (actual == 0) {
3519 		/* no threads, so return null pointer and deallocate memory */
3520 
3521 		*threads_out = NULL;
3522 		*countp = 0;
3523 		kfree_type(thread_t, count, thread_list);
3524 	} else {
3525 		/* if we allocated too much, must copy */
3526 		if (count_needed < count) {
3527 			void *newaddr;
3528 
3529 			newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3530 			if (newaddr == NULL) {
3531 				for (i = 0; i < actual; ++i) {
3532 					thread_deallocate(thread_list[i]);
3533 				}
3534 				kfree_type(thread_t, count, thread_list);
3535 				return KERN_RESOURCE_SHORTAGE;
3536 			}
3537 
3538 			bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3539 			kfree_type(thread_t, count, thread_list);
3540 			thread_list = (thread_t *)newaddr;
3541 		}
3542 
3543 		*threads_out = thread_list;
3544 		*countp = actual;
3545 
3546 		/* do the conversion that Mig should handle */
3547 
3548 		switch (flavor) {
3549 		case THREAD_FLAVOR_CONTROL:
3550 			if (task == current_task()) {
3551 				for (i = 0; i < actual; ++i) {
3552 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3553 				}
3554 			} else {
3555 				for (i = 0; i < actual; ++i) {
3556 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3557 				}
3558 			}
3559 			break;
3560 		case THREAD_FLAVOR_READ:
3561 			for (i = 0; i < actual; ++i) {
3562 				((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3563 			}
3564 			break;
3565 		case THREAD_FLAVOR_INSPECT:
3566 			for (i = 0; i < actual; ++i) {
3567 				((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3568 			}
3569 			break;
3570 		}
3571 	}
3572 
3573 	return KERN_SUCCESS;
3574 }
3575 
3576 kern_return_t
task_threads(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3577 task_threads(
3578 	task_t                      task,
3579 	thread_act_array_t         *threads_out,
3580 	mach_msg_type_number_t     *count)
3581 {
3582 	return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3583 }
3584 
3585 
3586 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3587 task_threads_from_user(
3588 	mach_port_t                 port,
3589 	thread_act_array_t         *threads_out,
3590 	mach_msg_type_number_t     *count)
3591 {
3592 	ipc_kobject_type_t kotype;
3593 	kern_return_t kr;
3594 
3595 	task_t task = convert_port_to_task_inspect_no_eval(port);
3596 
3597 	if (task == TASK_NULL) {
3598 		return KERN_INVALID_ARGUMENT;
3599 	}
3600 
3601 	kotype = ip_kotype(port);
3602 
3603 	switch (kotype) {
3604 	case IKOT_TASK_CONTROL:
3605 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3606 		break;
3607 	case IKOT_TASK_READ:
3608 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3609 		break;
3610 	case IKOT_TASK_INSPECT:
3611 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3612 		break;
3613 	default:
3614 		panic("strange kobject type");
3615 		break;
3616 	}
3617 
3618 	task_deallocate(task);
3619 	return kr;
3620 }
3621 
3622 #define TASK_HOLD_NORMAL        0
3623 #define TASK_HOLD_PIDSUSPEND    1
3624 #define TASK_HOLD_LEGACY        2
3625 #define TASK_HOLD_LEGACY_ALL    3
3626 
3627 static kern_return_t
place_task_hold(task_t task,int mode)3628 place_task_hold(
3629 	task_t task,
3630 	int mode)
3631 {
3632 	if (!task->active && !task_is_a_corpse(task)) {
3633 		return KERN_FAILURE;
3634 	}
3635 
3636 	/* Return success for corpse task */
3637 	if (task_is_a_corpse(task)) {
3638 		return KERN_SUCCESS;
3639 	}
3640 
3641 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3642 	    task_pid(task),
3643 	    task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3644 	    task->user_stop_count, task->user_stop_count + 1);
3645 
3646 #if MACH_ASSERT
3647 	current_task()->suspends_outstanding++;
3648 #endif
3649 
3650 	if (mode == TASK_HOLD_LEGACY) {
3651 		task->legacy_stop_count++;
3652 	}
3653 
3654 	if (task->user_stop_count++ > 0) {
3655 		/*
3656 		 *	If the stop count was positive, the task is
3657 		 *	already stopped and we can exit.
3658 		 */
3659 		return KERN_SUCCESS;
3660 	}
3661 
3662 	/*
3663 	 * Put a kernel-level hold on the threads in the task (all
3664 	 * user-level task suspensions added together represent a
3665 	 * single kernel-level hold).  We then wait for the threads
3666 	 * to stop executing user code.
3667 	 */
3668 	task_hold_locked(task);
3669 	task_wait_locked(task, FALSE);
3670 
3671 	return KERN_SUCCESS;
3672 }
3673 
3674 static kern_return_t
release_task_hold(task_t task,int mode)3675 release_task_hold(
3676 	task_t          task,
3677 	int                     mode)
3678 {
3679 	boolean_t release = FALSE;
3680 
3681 	if (!task->active && !task_is_a_corpse(task)) {
3682 		return KERN_FAILURE;
3683 	}
3684 
3685 	/* Return success for corpse task */
3686 	if (task_is_a_corpse(task)) {
3687 		return KERN_SUCCESS;
3688 	}
3689 
3690 	if (mode == TASK_HOLD_PIDSUSPEND) {
3691 		if (task->pidsuspended == FALSE) {
3692 			return KERN_FAILURE;
3693 		}
3694 		task->pidsuspended = FALSE;
3695 	}
3696 
3697 	if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3698 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3699 		    MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3700 		    task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3701 		    task->user_stop_count, mode, task->legacy_stop_count);
3702 
3703 #if MACH_ASSERT
3704 		/*
3705 		 * This is obviously not robust; if we suspend one task and then resume a different one,
3706 		 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3707 		 * or buggy suspender.
3708 		 */
3709 		current_task()->suspends_outstanding--;
3710 #endif
3711 
3712 		if (mode == TASK_HOLD_LEGACY_ALL) {
3713 			if (task->legacy_stop_count >= task->user_stop_count) {
3714 				task->user_stop_count = 0;
3715 				release = TRUE;
3716 			} else {
3717 				task->user_stop_count -= task->legacy_stop_count;
3718 			}
3719 			task->legacy_stop_count = 0;
3720 		} else {
3721 			if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3722 				task->legacy_stop_count--;
3723 			}
3724 			if (--task->user_stop_count == 0) {
3725 				release = TRUE;
3726 			}
3727 		}
3728 	} else {
3729 		return KERN_FAILURE;
3730 	}
3731 
3732 	/*
3733 	 *	Release the task if necessary.
3734 	 */
3735 	if (release) {
3736 		task_release_locked(task);
3737 	}
3738 
3739 	return KERN_SUCCESS;
3740 }
3741 
3742 boolean_t
get_task_suspended(task_t task)3743 get_task_suspended(task_t task)
3744 {
3745 	return 0 != task->user_stop_count;
3746 }
3747 
3748 /*
3749  *	task_suspend:
3750  *
3751  *	Implement an (old-fashioned) user-level suspension on a task.
3752  *
3753  *	Because the user isn't expecting to have to manage a suspension
3754  *	token, we'll track it for him in the kernel in the form of a naked
3755  *	send right to the task's resume port.  All such send rights
3756  *	account for a single suspension against the task (unlike task_suspend2()
3757  *	where each caller gets a unique suspension count represented by a
3758  *	unique send-once right).
3759  *
3760  * Conditions:
3761  *      The caller holds a reference to the task
3762  */
3763 kern_return_t
task_suspend(task_t task)3764 task_suspend(
3765 	task_t          task)
3766 {
3767 	kern_return_t                   kr;
3768 	mach_port_t                     port;
3769 	mach_port_name_t                name;
3770 
3771 	if (task == TASK_NULL || task == kernel_task) {
3772 		return KERN_INVALID_ARGUMENT;
3773 	}
3774 
3775 	/*
3776 	 * place a legacy hold on the task.
3777 	 */
3778 	task_lock(task);
3779 	kr = place_task_hold(task, TASK_HOLD_LEGACY);
3780 	task_unlock(task);
3781 
3782 	if (kr != KERN_SUCCESS) {
3783 		return kr;
3784 	}
3785 
3786 	/*
3787 	 * Claim a send right on the task resume port, and request a no-senders
3788 	 * notification on that port (if none outstanding).
3789 	 */
3790 	itk_lock(task);
3791 	port = task->itk_resume;
3792 	if (port == IP_NULL) {
3793 		port = ipc_kobject_alloc_port(task, IKOT_TASK_RESUME,
3794 		    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
3795 		task->itk_resume = port;
3796 	} else {
3797 		(void)ipc_kobject_make_send_nsrequest(port, task, IKOT_TASK_RESUME);
3798 	}
3799 	itk_unlock(task);
3800 
3801 	/*
3802 	 * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3803 	 * but we'll look it up when calling a traditional resume.  Any IPC operations that
3804 	 * deallocate the send right will auto-release the suspension.
3805 	 */
3806 	if (IP_VALID(port)) {
3807 		kr = ipc_object_copyout(current_space(), ip_to_object(port),
3808 		    MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3809 		    NULL, NULL, &name);
3810 	} else {
3811 		kr = KERN_SUCCESS;
3812 	}
3813 	if (kr != KERN_SUCCESS) {
3814 		printf("warning: %s(%d) failed to copyout suspension "
3815 		    "token for pid %d with error: %d\n",
3816 		    proc_name_address(get_bsdtask_info(current_task())),
3817 		    proc_pid(get_bsdtask_info(current_task())),
3818 		    task_pid(task), kr);
3819 	}
3820 
3821 	return kr;
3822 }
3823 
3824 /*
3825  *	task_resume:
3826  *		Release a user hold on a task.
3827  *
3828  * Conditions:
3829  *		The caller holds a reference to the task
3830  */
3831 kern_return_t
task_resume(task_t task)3832 task_resume(
3833 	task_t  task)
3834 {
3835 	kern_return_t    kr;
3836 	mach_port_name_t resume_port_name;
3837 	ipc_entry_t              resume_port_entry;
3838 	ipc_space_t              space = current_task()->itk_space;
3839 
3840 	if (task == TASK_NULL || task == kernel_task) {
3841 		return KERN_INVALID_ARGUMENT;
3842 	}
3843 
3844 	/* release a legacy task hold */
3845 	task_lock(task);
3846 	kr = release_task_hold(task, TASK_HOLD_LEGACY);
3847 	task_unlock(task);
3848 
3849 	itk_lock(task); /* for itk_resume */
3850 	is_write_lock(space); /* spin lock */
3851 	if (is_active(space) && IP_VALID(task->itk_resume) &&
3852 	    ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3853 		/*
3854 		 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3855 		 * we are holding one less legacy hold on the task from this caller.  If the release failed,
3856 		 * go ahead and drop all the rights, as someone either already released our holds or the task
3857 		 * is gone.
3858 		 */
3859 		itk_unlock(task);
3860 		if (kr == KERN_SUCCESS) {
3861 			ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3862 		} else {
3863 			ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3864 		}
3865 		/* space unlocked */
3866 	} else {
3867 		itk_unlock(task);
3868 		is_write_unlock(space);
3869 		if (kr == KERN_SUCCESS) {
3870 			printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3871 			    proc_name_address(get_bsdtask_info(current_task())), proc_pid(get_bsdtask_info(current_task())),
3872 			    task_pid(task));
3873 		}
3874 	}
3875 
3876 	return kr;
3877 }
3878 
3879 /*
3880  * Suspend the target task.
3881  * Making/holding a token/reference/port is the callers responsibility.
3882  */
3883 kern_return_t
task_suspend_internal(task_t task)3884 task_suspend_internal(task_t task)
3885 {
3886 	kern_return_t    kr;
3887 
3888 	if (task == TASK_NULL || task == kernel_task) {
3889 		return KERN_INVALID_ARGUMENT;
3890 	}
3891 
3892 	task_lock(task);
3893 	kr = place_task_hold(task, TASK_HOLD_NORMAL);
3894 	task_unlock(task);
3895 	return kr;
3896 }
3897 
3898 /*
3899  * Suspend the target task, and return a suspension token. The token
3900  * represents a reference on the suspended task.
3901  */
3902 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)3903 task_suspend2_grp(
3904 	task_t                  task,
3905 	task_suspension_token_t *suspend_token,
3906 	task_grp_t              grp)
3907 {
3908 	kern_return_t    kr;
3909 
3910 	kr = task_suspend_internal(task);
3911 	if (kr != KERN_SUCCESS) {
3912 		*suspend_token = TASK_NULL;
3913 		return kr;
3914 	}
3915 
3916 	/*
3917 	 * Take a reference on the target task and return that to the caller
3918 	 * as a "suspension token," which can be converted into an SO right to
3919 	 * the now-suspended task's resume port.
3920 	 */
3921 	task_reference_grp(task, grp);
3922 	*suspend_token = task;
3923 
3924 	return KERN_SUCCESS;
3925 }
3926 
3927 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)3928 task_suspend2_mig(
3929 	task_t                  task,
3930 	task_suspension_token_t *suspend_token)
3931 {
3932 	return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
3933 }
3934 
3935 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)3936 task_suspend2_external(
3937 	task_t                  task,
3938 	task_suspension_token_t *suspend_token)
3939 {
3940 	return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
3941 }
3942 
3943 /*
3944  * Resume the task
3945  * (reference/token/port management is caller's responsibility).
3946  */
3947 kern_return_t
task_resume_internal(task_suspension_token_t task)3948 task_resume_internal(
3949 	task_suspension_token_t         task)
3950 {
3951 	kern_return_t kr;
3952 
3953 	if (task == TASK_NULL || task == kernel_task) {
3954 		return KERN_INVALID_ARGUMENT;
3955 	}
3956 
3957 	task_lock(task);
3958 	kr = release_task_hold(task, TASK_HOLD_NORMAL);
3959 	task_unlock(task);
3960 	return kr;
3961 }
3962 
3963 /*
3964  * Resume the task using a suspension token. Consumes the token's ref.
3965  */
3966 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)3967 task_resume2_grp(
3968 	task_suspension_token_t         task,
3969 	task_grp_t                      grp)
3970 {
3971 	kern_return_t kr;
3972 
3973 	kr = task_resume_internal(task);
3974 	task_suspension_token_deallocate_grp(task, grp);
3975 
3976 	return kr;
3977 }
3978 
3979 kern_return_t
task_resume2_mig(task_suspension_token_t task)3980 task_resume2_mig(
3981 	task_suspension_token_t         task)
3982 {
3983 	return task_resume2_grp(task, TASK_GRP_MIG);
3984 }
3985 
3986 kern_return_t
task_resume2_external(task_suspension_token_t task)3987 task_resume2_external(
3988 	task_suspension_token_t         task)
3989 {
3990 	return task_resume2_grp(task, TASK_GRP_EXTERNAL);
3991 }
3992 
3993 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)3994 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
3995 {
3996 	task_t task = convert_port_to_task_suspension_token(port);
3997 	kern_return_t kr;
3998 
3999 	if (task == TASK_NULL) {
4000 		return;
4001 	}
4002 
4003 	if (task == kernel_task) {
4004 		task_suspension_token_deallocate(task);
4005 		return;
4006 	}
4007 
4008 	task_lock(task);
4009 
4010 	kr = ipc_kobject_nsrequest(port, mscount, NULL);
4011 	if (kr == KERN_FAILURE) {
4012 		/* release all the [remaining] outstanding legacy holds */
4013 		release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4014 	}
4015 
4016 	task_unlock(task);
4017 
4018 	task_suspension_token_deallocate(task);         /* drop token reference */
4019 }
4020 
4021 /*
4022  * Fires when a send once made
4023  * by convert_task_suspension_token_to_port() dies.
4024  */
4025 void
task_suspension_send_once(ipc_port_t port)4026 task_suspension_send_once(ipc_port_t port)
4027 {
4028 	task_t task = convert_port_to_task_suspension_token(port);
4029 
4030 	if (task == TASK_NULL || task == kernel_task) {
4031 		return;         /* nothing to do */
4032 	}
4033 
4034 	/* release the hold held by this specific send-once right */
4035 	task_lock(task);
4036 	release_task_hold(task, TASK_HOLD_NORMAL);
4037 	task_unlock(task);
4038 
4039 	task_suspension_token_deallocate(task);         /* drop token reference */
4040 }
4041 
4042 static kern_return_t
task_pidsuspend_locked(task_t task)4043 task_pidsuspend_locked(task_t task)
4044 {
4045 	kern_return_t kr;
4046 
4047 	if (task->pidsuspended) {
4048 		kr = KERN_FAILURE;
4049 		goto out;
4050 	}
4051 
4052 	task->pidsuspended = TRUE;
4053 
4054 	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4055 	if (kr != KERN_SUCCESS) {
4056 		task->pidsuspended = FALSE;
4057 	}
4058 out:
4059 	return kr;
4060 }
4061 
4062 
4063 /*
4064  *	task_pidsuspend:
4065  *
4066  *	Suspends a task by placing a hold on its threads.
4067  *
4068  * Conditions:
4069  *      The caller holds a reference to the task
4070  */
4071 kern_return_t
task_pidsuspend(task_t task)4072 task_pidsuspend(
4073 	task_t          task)
4074 {
4075 	kern_return_t    kr;
4076 
4077 	if (task == TASK_NULL || task == kernel_task) {
4078 		return KERN_INVALID_ARGUMENT;
4079 	}
4080 
4081 	task_lock(task);
4082 
4083 	kr = task_pidsuspend_locked(task);
4084 
4085 	task_unlock(task);
4086 
4087 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4088 		iokit_task_app_suspended_changed(task);
4089 	}
4090 
4091 	return kr;
4092 }
4093 
4094 /*
4095  *	task_pidresume:
4096  *		Resumes a previously suspended task.
4097  *
4098  * Conditions:
4099  *		The caller holds a reference to the task
4100  */
4101 kern_return_t
task_pidresume(task_t task)4102 task_pidresume(
4103 	task_t  task)
4104 {
4105 	kern_return_t    kr;
4106 
4107 	if (task == TASK_NULL || task == kernel_task) {
4108 		return KERN_INVALID_ARGUMENT;
4109 	}
4110 
4111 	task_lock(task);
4112 
4113 #if CONFIG_FREEZE
4114 
4115 	while (task->changing_freeze_state) {
4116 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4117 		task_unlock(task);
4118 		thread_block(THREAD_CONTINUE_NULL);
4119 
4120 		task_lock(task);
4121 	}
4122 	task->changing_freeze_state = TRUE;
4123 #endif
4124 
4125 	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4126 
4127 	task_unlock(task);
4128 
4129 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4130 		iokit_task_app_suspended_changed(task);
4131 	}
4132 
4133 #if CONFIG_FREEZE
4134 
4135 	task_lock(task);
4136 
4137 	if (kr == KERN_SUCCESS) {
4138 		task->frozen = FALSE;
4139 	}
4140 	task->changing_freeze_state = FALSE;
4141 	thread_wakeup(&task->changing_freeze_state);
4142 
4143 	task_unlock(task);
4144 #endif
4145 
4146 	return kr;
4147 }
4148 
4149 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4150 
4151 /*
4152  *	task_add_turnstile_watchports:
4153  *		Setup watchports to boost the main thread of the task.
4154  *
4155  *	Arguments:
4156  *		task: task being spawned
4157  *		thread: main thread of task
4158  *		portwatch_ports: array of watchports
4159  *		portwatch_count: number of watchports
4160  *
4161  *	Conditions:
4162  *		Nothing locked.
4163  */
4164 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4165 task_add_turnstile_watchports(
4166 	task_t          task,
4167 	thread_t        thread,
4168 	ipc_port_t      *portwatch_ports,
4169 	uint32_t        portwatch_count)
4170 {
4171 	struct task_watchports *watchports = NULL;
4172 	struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4173 	os_ref_count_t refs;
4174 
4175 	/* Check if the task has terminated */
4176 	if (!task->active) {
4177 		return;
4178 	}
4179 
4180 	assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4181 
4182 	watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4183 
4184 	/* Lock the ipc space */
4185 	is_write_lock(task->itk_space);
4186 
4187 	/* Setup watchports to boost the main thread */
4188 	refs = task_add_turnstile_watchports_locked(task,
4189 	    watchports, previous_elem_array, portwatch_ports,
4190 	    portwatch_count);
4191 
4192 	/* Drop the space lock */
4193 	is_write_unlock(task->itk_space);
4194 
4195 	if (refs == 0) {
4196 		task_watchports_deallocate(watchports);
4197 	}
4198 
4199 	/* Drop the ref on previous_elem_array */
4200 	for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4201 		task_watchport_elem_deallocate(previous_elem_array[i]);
4202 	}
4203 }
4204 
4205 /*
4206  *	task_remove_turnstile_watchports:
4207  *		Clear all turnstile boost on the task from watchports.
4208  *
4209  *	Arguments:
4210  *		task: task being terminated
4211  *
4212  *	Conditions:
4213  *		Nothing locked.
4214  */
4215 void
task_remove_turnstile_watchports(task_t task)4216 task_remove_turnstile_watchports(
4217 	task_t          task)
4218 {
4219 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4220 	struct task_watchports *watchports = NULL;
4221 	ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4222 	uint32_t portwatch_count;
4223 
4224 	/* Lock the ipc space */
4225 	is_write_lock(task->itk_space);
4226 
4227 	/* Check if watchport boost exist */
4228 	if (task->watchports == NULL) {
4229 		is_write_unlock(task->itk_space);
4230 		return;
4231 	}
4232 	watchports = task->watchports;
4233 	portwatch_count = watchports->tw_elem_array_count;
4234 
4235 	refs = task_remove_turnstile_watchports_locked(task, watchports,
4236 	    port_freelist);
4237 
4238 	is_write_unlock(task->itk_space);
4239 
4240 	/* Drop all the port references */
4241 	for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4242 		ip_release(port_freelist[i]);
4243 	}
4244 
4245 	/* Clear the task and thread references for task_watchport */
4246 	if (refs == 0) {
4247 		task_watchports_deallocate(watchports);
4248 	}
4249 }
4250 
4251 /*
4252  *	task_transfer_turnstile_watchports:
4253  *		Transfer all watchport turnstile boost from old task to new task.
4254  *
4255  *	Arguments:
4256  *		old_task: task calling exec
4257  *		new_task: new exec'ed task
4258  *		thread: main thread of new task
4259  *
4260  *	Conditions:
4261  *		Nothing locked.
4262  */
4263 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4264 task_transfer_turnstile_watchports(
4265 	task_t   old_task,
4266 	task_t   new_task,
4267 	thread_t new_thread)
4268 {
4269 	struct task_watchports *old_watchports = NULL;
4270 	struct task_watchports *new_watchports = NULL;
4271 	os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4272 	os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4273 	uint32_t portwatch_count;
4274 
4275 	if (old_task->watchports == NULL || !new_task->active) {
4276 		return;
4277 	}
4278 
4279 	/* Get the watch port count from the old task */
4280 	is_write_lock(old_task->itk_space);
4281 	if (old_task->watchports == NULL) {
4282 		is_write_unlock(old_task->itk_space);
4283 		return;
4284 	}
4285 
4286 	portwatch_count = old_task->watchports->tw_elem_array_count;
4287 	is_write_unlock(old_task->itk_space);
4288 
4289 	new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4290 
4291 	/* Lock the ipc space for old task */
4292 	is_write_lock(old_task->itk_space);
4293 
4294 	/* Lock the ipc space for new task */
4295 	is_write_lock(new_task->itk_space);
4296 
4297 	/* Check if watchport boost exist */
4298 	if (old_task->watchports == NULL || !new_task->active) {
4299 		is_write_unlock(new_task->itk_space);
4300 		is_write_unlock(old_task->itk_space);
4301 		(void)task_watchports_release(new_watchports);
4302 		task_watchports_deallocate(new_watchports);
4303 		return;
4304 	}
4305 
4306 	old_watchports = old_task->watchports;
4307 	assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4308 
4309 	/* Setup new task watchports */
4310 	new_task->watchports = new_watchports;
4311 
4312 	for (uint32_t i = 0; i < portwatch_count; i++) {
4313 		ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4314 
4315 		if (port == NULL) {
4316 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4317 			continue;
4318 		}
4319 
4320 		/* Lock the port and check if it has the entry */
4321 		ip_mq_lock(port);
4322 
4323 		task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4324 
4325 		if (ipc_port_replace_watchport_elem_conditional_locked(port,
4326 		    &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4327 			task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4328 
4329 			task_watchports_retain(new_watchports);
4330 			old_refs = task_watchports_release(old_watchports);
4331 
4332 			/* Check if all ports are cleaned */
4333 			if (old_refs == 0) {
4334 				old_task->watchports = NULL;
4335 			}
4336 		} else {
4337 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4338 		}
4339 		/* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4340 	}
4341 
4342 	/* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4343 	new_refs = task_watchports_release(new_watchports);
4344 	if (new_refs == 0) {
4345 		new_task->watchports = NULL;
4346 	}
4347 
4348 	is_write_unlock(new_task->itk_space);
4349 	is_write_unlock(old_task->itk_space);
4350 
4351 	/* Clear the task and thread references for old_watchport */
4352 	if (old_refs == 0) {
4353 		task_watchports_deallocate(old_watchports);
4354 	}
4355 
4356 	/* Clear the task and thread references for new_watchport */
4357 	if (new_refs == 0) {
4358 		task_watchports_deallocate(new_watchports);
4359 	}
4360 }
4361 
4362 /*
4363  *	task_add_turnstile_watchports_locked:
4364  *		Setup watchports to boost the main thread of the task.
4365  *
4366  *	Arguments:
4367  *		task: task to boost
4368  *		watchports: watchport structure to be attached to the task
4369  *		previous_elem_array: an array of old watchport_elem to be returned to caller
4370  *		portwatch_ports: array of watchports
4371  *		portwatch_count: number of watchports
4372  *
4373  *	Conditions:
4374  *		ipc space of the task locked.
4375  *		returns array of old watchport_elem in previous_elem_array
4376  */
4377 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4378 task_add_turnstile_watchports_locked(
4379 	task_t                      task,
4380 	struct task_watchports      *watchports,
4381 	struct task_watchport_elem  **previous_elem_array,
4382 	ipc_port_t                  *portwatch_ports,
4383 	uint32_t                    portwatch_count)
4384 {
4385 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4386 
4387 	/* Check if the task is still active */
4388 	if (!task->active) {
4389 		refs = task_watchports_release(watchports);
4390 		return refs;
4391 	}
4392 
4393 	assert(task->watchports == NULL);
4394 	task->watchports = watchports;
4395 
4396 	for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4397 		ipc_port_t port = portwatch_ports[i];
4398 
4399 		task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4400 		if (port == NULL) {
4401 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4402 			continue;
4403 		}
4404 
4405 		ip_mq_lock(port);
4406 
4407 		/* Check if port is in valid state to be setup as watchport */
4408 		if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4409 		    &previous_elem_array[j]) != KERN_SUCCESS) {
4410 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4411 			continue;
4412 		}
4413 		/* port unlocked on return */
4414 
4415 		ip_reference(port);
4416 		task_watchports_retain(watchports);
4417 		if (previous_elem_array[j] != NULL) {
4418 			j++;
4419 		}
4420 	}
4421 
4422 	/* Drop the reference on task_watchport struct returned by os_ref_init */
4423 	refs = task_watchports_release(watchports);
4424 	if (refs == 0) {
4425 		task->watchports = NULL;
4426 	}
4427 
4428 	return refs;
4429 }
4430 
4431 /*
4432  *	task_remove_turnstile_watchports_locked:
4433  *		Clear all turnstile boost on the task from watchports.
4434  *
4435  *	Arguments:
4436  *		task: task to remove watchports from
4437  *		watchports: watchports structure for the task
4438  *		port_freelist: array of ports returned with ref to caller
4439  *
4440  *
4441  *	Conditions:
4442  *		ipc space of the task locked.
4443  *		array of ports with refs are returned in port_freelist
4444  */
4445 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4446 task_remove_turnstile_watchports_locked(
4447 	task_t                 task,
4448 	struct task_watchports *watchports,
4449 	ipc_port_t             *port_freelist)
4450 {
4451 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4452 
4453 	for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4454 		ipc_port_t port = watchports->tw_elem[i].twe_port;
4455 		if (port == NULL) {
4456 			continue;
4457 		}
4458 
4459 		/* Lock the port and check if it has the entry */
4460 		ip_mq_lock(port);
4461 		if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4462 		    &watchports->tw_elem[i]) == KERN_SUCCESS) {
4463 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4464 			port_freelist[j++] = port;
4465 			refs = task_watchports_release(watchports);
4466 
4467 			/* Check if all ports are cleaned */
4468 			if (refs == 0) {
4469 				task->watchports = NULL;
4470 				break;
4471 			}
4472 		}
4473 		/* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4474 	}
4475 	return refs;
4476 }
4477 
4478 /*
4479  *	task_watchports_alloc_init:
4480  *		Allocate and initialize task watchport struct.
4481  *
4482  *	Conditions:
4483  *		Nothing locked.
4484  */
4485 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4486 task_watchports_alloc_init(
4487 	task_t        task,
4488 	thread_t      thread,
4489 	uint32_t      count)
4490 {
4491 	struct task_watchports *watchports = kalloc_type(struct task_watchports,
4492 	    struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4493 
4494 	task_reference(task);
4495 	thread_reference(thread);
4496 	watchports->tw_task = task;
4497 	watchports->tw_thread = thread;
4498 	watchports->tw_elem_array_count = count;
4499 	os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4500 
4501 	return watchports;
4502 }
4503 
4504 /*
4505  *	task_watchports_deallocate:
4506  *		Deallocate task watchport struct.
4507  *
4508  *	Conditions:
4509  *		Nothing locked.
4510  */
4511 static void
task_watchports_deallocate(struct task_watchports * watchports)4512 task_watchports_deallocate(
4513 	struct task_watchports *watchports)
4514 {
4515 	uint32_t portwatch_count = watchports->tw_elem_array_count;
4516 
4517 	task_deallocate(watchports->tw_task);
4518 	thread_deallocate(watchports->tw_thread);
4519 	kfree_type(struct task_watchports, struct task_watchport_elem,
4520 	    portwatch_count, watchports);
4521 }
4522 
4523 /*
4524  *	task_watchport_elem_deallocate:
4525  *		Deallocate task watchport element and release its ref on task_watchport.
4526  *
4527  *	Conditions:
4528  *		Nothing locked.
4529  */
4530 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4531 task_watchport_elem_deallocate(
4532 	struct task_watchport_elem *watchport_elem)
4533 {
4534 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4535 	task_t task = watchport_elem->twe_task;
4536 	struct task_watchports *watchports = NULL;
4537 	ipc_port_t port = NULL;
4538 
4539 	assert(task != NULL);
4540 
4541 	/* Take the space lock to modify the elememt */
4542 	is_write_lock(task->itk_space);
4543 
4544 	watchports = task->watchports;
4545 	assert(watchports != NULL);
4546 
4547 	port = watchport_elem->twe_port;
4548 	assert(port != NULL);
4549 
4550 	task_watchport_elem_clear(watchport_elem);
4551 	refs = task_watchports_release(watchports);
4552 
4553 	if (refs == 0) {
4554 		task->watchports = NULL;
4555 	}
4556 
4557 	is_write_unlock(task->itk_space);
4558 
4559 	ip_release(port);
4560 	if (refs == 0) {
4561 		task_watchports_deallocate(watchports);
4562 	}
4563 }
4564 
4565 /*
4566  *	task_has_watchports:
4567  *		Return TRUE if task has watchport boosts.
4568  *
4569  *	Conditions:
4570  *		Nothing locked.
4571  */
4572 boolean_t
task_has_watchports(task_t task)4573 task_has_watchports(task_t task)
4574 {
4575 	return task->watchports != NULL;
4576 }
4577 
4578 #if DEVELOPMENT || DEBUG
4579 
4580 extern void IOSleep(int);
4581 
4582 kern_return_t
task_disconnect_page_mappings(task_t task)4583 task_disconnect_page_mappings(task_t task)
4584 {
4585 	int     n;
4586 
4587 	if (task == TASK_NULL || task == kernel_task) {
4588 		return KERN_INVALID_ARGUMENT;
4589 	}
4590 
4591 	/*
4592 	 * this function is used to strip all of the mappings from
4593 	 * the pmap for the specified task to force the task to
4594 	 * re-fault all of the pages it is actively using... this
4595 	 * allows us to approximate the true working set of the
4596 	 * specified task.  We only engage if at least 1 of the
4597 	 * threads in the task is runnable, but we want to continuously
4598 	 * sweep (at least for a while - I've arbitrarily set the limit at
4599 	 * 100 sweeps to be re-looked at as we gain experience) to get a better
4600 	 * view into what areas within a page are being visited (as opposed to only
4601 	 * seeing the first fault of a page after the task becomes
4602 	 * runnable)...  in the future I may
4603 	 * try to block until awakened by a thread in this task
4604 	 * being made runnable, but for now we'll periodically poll from the
4605 	 * user level debug tool driving the sysctl
4606 	 */
4607 	for (n = 0; n < 100; n++) {
4608 		thread_t        thread;
4609 		boolean_t       runnable;
4610 		boolean_t       do_unnest;
4611 		int             page_count;
4612 
4613 		runnable = FALSE;
4614 		do_unnest = FALSE;
4615 
4616 		task_lock(task);
4617 
4618 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
4619 			if (thread->state & TH_RUN) {
4620 				runnable = TRUE;
4621 				break;
4622 			}
4623 		}
4624 		if (n == 0) {
4625 			task->task_disconnected_count++;
4626 		}
4627 
4628 		if (task->task_unnested == FALSE) {
4629 			if (runnable == TRUE) {
4630 				task->task_unnested = TRUE;
4631 				do_unnest = TRUE;
4632 			}
4633 		}
4634 		task_unlock(task);
4635 
4636 		if (runnable == FALSE) {
4637 			break;
4638 		}
4639 
4640 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4641 		    task, do_unnest, task->task_disconnected_count, 0, 0);
4642 
4643 		page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4644 
4645 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4646 		    task, page_count, 0, 0, 0);
4647 
4648 		if ((n % 5) == 4) {
4649 			IOSleep(1);
4650 		}
4651 	}
4652 	return KERN_SUCCESS;
4653 }
4654 
4655 #endif
4656 
4657 
4658 #if CONFIG_FREEZE
4659 
4660 /*
4661  *	task_freeze:
4662  *
4663  *	Freeze a task.
4664  *
4665  * Conditions:
4666  *      The caller holds a reference to the task
4667  */
4668 extern void     vm_wake_compactor_swapper(void);
4669 extern struct freezer_context freezer_context_global;
4670 
4671 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4672 task_freeze(
4673 	task_t    task,
4674 	uint32_t           *purgeable_count,
4675 	uint32_t           *wired_count,
4676 	uint32_t           *clean_count,
4677 	uint32_t           *dirty_count,
4678 	uint32_t           dirty_budget,
4679 	uint32_t           *shared_count,
4680 	int                *freezer_error_code,
4681 	boolean_t          eval_only)
4682 {
4683 	kern_return_t kr = KERN_SUCCESS;
4684 
4685 	if (task == TASK_NULL || task == kernel_task) {
4686 		return KERN_INVALID_ARGUMENT;
4687 	}
4688 
4689 	task_lock(task);
4690 
4691 	while (task->changing_freeze_state) {
4692 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4693 		task_unlock(task);
4694 		thread_block(THREAD_CONTINUE_NULL);
4695 
4696 		task_lock(task);
4697 	}
4698 	if (task->frozen) {
4699 		task_unlock(task);
4700 		return KERN_FAILURE;
4701 	}
4702 	task->changing_freeze_state = TRUE;
4703 
4704 	freezer_context_global.freezer_ctx_task = task;
4705 
4706 	task_unlock(task);
4707 
4708 	kr = vm_map_freeze(task,
4709 	    purgeable_count,
4710 	    wired_count,
4711 	    clean_count,
4712 	    dirty_count,
4713 	    dirty_budget,
4714 	    shared_count,
4715 	    freezer_error_code,
4716 	    eval_only);
4717 
4718 	task_lock(task);
4719 
4720 	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4721 		task->frozen = TRUE;
4722 
4723 		freezer_context_global.freezer_ctx_task = NULL;
4724 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4725 
4726 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4727 			/*
4728 			 * reset the counter tracking the # of swapped compressed pages
4729 			 * because we are now done with this freeze session and task.
4730 			 */
4731 
4732 			*dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64);         /*used to track pageouts*/
4733 		}
4734 
4735 		freezer_context_global.freezer_ctx_swapped_bytes = 0;
4736 	}
4737 
4738 	task->changing_freeze_state = FALSE;
4739 	thread_wakeup(&task->changing_freeze_state);
4740 
4741 	task_unlock(task);
4742 
4743 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4744 	    (kr == KERN_SUCCESS) &&
4745 	    (eval_only == FALSE)) {
4746 		vm_wake_compactor_swapper();
4747 		/*
4748 		 * We do an explicit wakeup of the swapout thread here
4749 		 * because the compact_and_swap routines don't have
4750 		 * knowledge about these kind of "per-task packed c_segs"
4751 		 * and so will not be evaluating whether we need to do
4752 		 * a wakeup there.
4753 		 */
4754 		thread_wakeup((event_t)&vm_swapout_thread);
4755 	}
4756 
4757 	return kr;
4758 }
4759 
4760 /*
4761  *	task_thaw:
4762  *
4763  *	Thaw a currently frozen task.
4764  *
4765  * Conditions:
4766  *      The caller holds a reference to the task
4767  */
4768 kern_return_t
task_thaw(task_t task)4769 task_thaw(
4770 	task_t          task)
4771 {
4772 	if (task == TASK_NULL || task == kernel_task) {
4773 		return KERN_INVALID_ARGUMENT;
4774 	}
4775 
4776 	task_lock(task);
4777 
4778 	while (task->changing_freeze_state) {
4779 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4780 		task_unlock(task);
4781 		thread_block(THREAD_CONTINUE_NULL);
4782 
4783 		task_lock(task);
4784 	}
4785 	if (!task->frozen) {
4786 		task_unlock(task);
4787 		return KERN_FAILURE;
4788 	}
4789 	task->frozen = FALSE;
4790 
4791 	task_unlock(task);
4792 
4793 	return KERN_SUCCESS;
4794 }
4795 
4796 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)4797 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
4798 {
4799 	/*
4800 	 * We don't assert that the task lock is held because we call this
4801 	 * routine from the decompression path and we won't be holding the
4802 	 * task lock. However, since we are in the context of the task we are
4803 	 * safe.
4804 	 * In the case of the task_freeze path, we call it from behind the task
4805 	 * lock but we don't need to because we have a reference on the proc
4806 	 * being frozen.
4807 	 */
4808 
4809 	assert(task);
4810 	if (amount == 0) {
4811 		return;
4812 	}
4813 
4814 	if (op == CREDIT_TO_SWAP) {
4815 		ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4816 	} else if (op == DEBIT_FROM_SWAP) {
4817 		ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4818 	} else {
4819 		panic("task_update_frozen_to_swap_acct: Invalid ledger op");
4820 	}
4821 }
4822 #endif /* CONFIG_FREEZE */
4823 
4824 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)4825 task_set_security_tokens(
4826 	task_t           task,
4827 	security_token_t sec_token,
4828 	audit_token_t    audit_token,
4829 	host_priv_t      host_priv)
4830 {
4831 	ipc_port_t       host_port = IP_NULL;
4832 	kern_return_t    kr;
4833 
4834 	if (task == TASK_NULL) {
4835 		return KERN_INVALID_ARGUMENT;
4836 	}
4837 
4838 	task_lock(task);
4839 	task_set_tokens(task, &sec_token, &audit_token);
4840 	task_unlock(task);
4841 
4842 	if (host_priv != HOST_PRIV_NULL) {
4843 		kr = host_get_host_priv_port(host_priv, &host_port);
4844 	} else {
4845 		kr = host_get_host_port(host_priv_self(), &host_port);
4846 	}
4847 	assert(kr == KERN_SUCCESS);
4848 
4849 	kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4850 	return kr;
4851 }
4852 
4853 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)4854 task_send_trace_memory(
4855 	__unused task_t   target_task,
4856 	__unused uint32_t pid,
4857 	__unused uint64_t uniqueid)
4858 {
4859 	return KERN_INVALID_ARGUMENT;
4860 }
4861 
4862 /*
4863  * This routine was added, pretty much exclusively, for registering the
4864  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4865  * removing it completely, I have only disabled that feature (which was
4866  * the only feature at the time).  It just appears that we are going to
4867  * want to add some user data to tasks in the future (i.e. bsd info,
4868  * task names, etc...), so I left it in the formal task interface.
4869  */
4870 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)4871 task_set_info(
4872 	task_t          task,
4873 	task_flavor_t   flavor,
4874 	__unused task_info_t    task_info_in,           /* pointer to IN array */
4875 	__unused mach_msg_type_number_t task_info_count)
4876 {
4877 	if (task == TASK_NULL) {
4878 		return KERN_INVALID_ARGUMENT;
4879 	}
4880 	switch (flavor) {
4881 #if CONFIG_ATM
4882 	case TASK_TRACE_MEMORY_INFO:
4883 		return KERN_NOT_SUPPORTED;
4884 #endif // CONFIG_ATM
4885 	default:
4886 		return KERN_INVALID_ARGUMENT;
4887 	}
4888 }
4889 
4890 static void
_task_fill_times(task_t task,time_value_t * user_time,time_value_t * sys_time)4891 _task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
4892 {
4893 	clock_sec_t sec;
4894 	clock_usec_t usec;
4895 
4896 	struct recount_times_mach times = recount_task_terminated_times(task);
4897 	absolutetime_to_microtime(times.rtm_user, &sec, &usec);
4898 	user_time->seconds = (typeof(user_time->seconds))sec;
4899 	user_time->microseconds = usec;
4900 	absolutetime_to_microtime(times.rtm_system, &sec, &usec);
4901 	sys_time->seconds = (typeof(sys_time->seconds))sec;
4902 	sys_time->microseconds = usec;
4903 }
4904 
4905 int radar_20146450 = 1;
4906 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)4907 task_info(
4908 	task_t                  task,
4909 	task_flavor_t           flavor,
4910 	task_info_t             task_info_out,
4911 	mach_msg_type_number_t  *task_info_count)
4912 {
4913 	kern_return_t error = KERN_SUCCESS;
4914 	mach_msg_type_number_t  original_task_info_count;
4915 	bool is_kernel_task = (task == kernel_task);
4916 
4917 	if (task == TASK_NULL) {
4918 		return KERN_INVALID_ARGUMENT;
4919 	}
4920 
4921 	original_task_info_count = *task_info_count;
4922 	task_lock(task);
4923 
4924 	if (task != current_task() && !task->active) {
4925 		task_unlock(task);
4926 		return KERN_INVALID_ARGUMENT;
4927 	}
4928 
4929 
4930 	switch (flavor) {
4931 	case TASK_BASIC_INFO_32:
4932 	case TASK_BASIC2_INFO_32:
4933 #if defined(__arm64__)
4934 	case TASK_BASIC_INFO_64:
4935 #endif
4936 		{
4937 			task_basic_info_32_t basic_info;
4938 			ledger_amount_t      tmp;
4939 
4940 			if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4941 				error = KERN_INVALID_ARGUMENT;
4942 				break;
4943 			}
4944 
4945 			basic_info = (task_basic_info_32_t)task_info_out;
4946 
4947 			basic_info->virtual_size = (typeof(basic_info->virtual_size))
4948 			    vm_map_adjusted_size(is_kernel_task ? kernel_map : task->map);
4949 			if (flavor == TASK_BASIC2_INFO_32) {
4950 				/*
4951 				 * The "BASIC2" flavor gets the maximum resident
4952 				 * size instead of the current resident size...
4953 				 */
4954 				ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
4955 			} else {
4956 				ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
4957 			}
4958 			basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
4959 
4960 			_task_fill_times(task, &basic_info->user_time,
4961 			    &basic_info->system_time);
4962 
4963 			basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
4964 			basic_info->suspend_count = task->user_stop_count;
4965 
4966 			*task_info_count = TASK_BASIC_INFO_32_COUNT;
4967 			break;
4968 		}
4969 
4970 #if defined(__arm64__)
4971 	case TASK_BASIC_INFO_64_2:
4972 	{
4973 		task_basic_info_64_2_t  basic_info;
4974 
4975 		if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4976 			error = KERN_INVALID_ARGUMENT;
4977 			break;
4978 		}
4979 
4980 		basic_info = (task_basic_info_64_2_t)task_info_out;
4981 
4982 		basic_info->virtual_size  = vm_map_adjusted_size(is_kernel_task ?
4983 		    kernel_map : task->map);
4984 		ledger_get_balance(task->ledger, task_ledgers.phys_mem,
4985 		    (ledger_amount_t *)&basic_info->resident_size);
4986 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
4987 		basic_info->suspend_count = task->user_stop_count;
4988 		_task_fill_times(task, &basic_info->user_time,
4989 		    &basic_info->system_time);
4990 
4991 		*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4992 		break;
4993 	}
4994 
4995 #else /* defined(__arm64__) */
4996 	case TASK_BASIC_INFO_64:
4997 	{
4998 		task_basic_info_64_t basic_info;
4999 
5000 		if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5001 			error = KERN_INVALID_ARGUMENT;
5002 			break;
5003 		}
5004 
5005 		basic_info = (task_basic_info_64_t)task_info_out;
5006 
5007 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5008 		    kernel_map : task->map);
5009 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5010 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5011 		basic_info->suspend_count = task->user_stop_count;
5012 		_task_fill_times(task, &basic_info->user_time,
5013 		    &basic_info->system_time);
5014 
5015 		*task_info_count = TASK_BASIC_INFO_64_COUNT;
5016 		break;
5017 	}
5018 #endif /* defined(__arm64__) */
5019 
5020 	case MACH_TASK_BASIC_INFO:
5021 	{
5022 		mach_task_basic_info_t  basic_info;
5023 
5024 		if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5025 			error = KERN_INVALID_ARGUMENT;
5026 			break;
5027 		}
5028 
5029 		basic_info = (mach_task_basic_info_t)task_info_out;
5030 
5031 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5032 		    kernel_map : task->map);
5033 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
5034 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
5035 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5036 		basic_info->suspend_count = task->user_stop_count;
5037 		_task_fill_times(task, &basic_info->user_time,
5038 		    &basic_info->system_time);
5039 
5040 		*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5041 		break;
5042 	}
5043 
5044 	case TASK_THREAD_TIMES_INFO:
5045 	{
5046 		task_thread_times_info_t times_info;
5047 		thread_t                 thread;
5048 
5049 		if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5050 			error = KERN_INVALID_ARGUMENT;
5051 			break;
5052 		}
5053 
5054 		times_info = (task_thread_times_info_t)task_info_out;
5055 		times_info->user_time = (time_value_t){ 0 };
5056 		times_info->system_time = (time_value_t){ 0 };
5057 
5058 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5059 			if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5060 				time_value_t user_time, system_time;
5061 
5062 				thread_read_times(thread, &user_time, &system_time, NULL);
5063 				time_value_add(&times_info->user_time, &user_time);
5064 				time_value_add(&times_info->system_time, &system_time);
5065 			}
5066 		}
5067 
5068 		*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5069 		break;
5070 	}
5071 
5072 	case TASK_ABSOLUTETIME_INFO:
5073 	{
5074 		task_absolutetime_info_t        info;
5075 
5076 		if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5077 			error = KERN_INVALID_ARGUMENT;
5078 			break;
5079 		}
5080 
5081 		info = (task_absolutetime_info_t)task_info_out;
5082 
5083 		struct recount_times_mach term_times =
5084 		    recount_task_terminated_times(task);
5085 		struct recount_times_mach total_times = recount_task_times(task);
5086 
5087 		info->total_user = total_times.rtm_user;
5088 		info->total_system = total_times.rtm_system;
5089 		info->threads_user = total_times.rtm_user - term_times.rtm_user;
5090 		info->threads_system += total_times.rtm_system - term_times.rtm_system;
5091 
5092 		*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5093 		break;
5094 	}
5095 
5096 	case TASK_DYLD_INFO:
5097 	{
5098 		task_dyld_info_t info;
5099 
5100 		/*
5101 		 * We added the format field to TASK_DYLD_INFO output.  For
5102 		 * temporary backward compatibility, accept the fact that
5103 		 * clients may ask for the old version - distinquished by the
5104 		 * size of the expected result structure.
5105 		 */
5106 #define TASK_LEGACY_DYLD_INFO_COUNT \
5107 	        offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5108 
5109 		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5110 			error = KERN_INVALID_ARGUMENT;
5111 			break;
5112 		}
5113 
5114 		info = (task_dyld_info_t)task_info_out;
5115 		info->all_image_info_addr = task->all_image_info_addr;
5116 		info->all_image_info_size = task->all_image_info_size;
5117 
5118 		/* only set format on output for those expecting it */
5119 		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5120 			info->all_image_info_format = task_has_64Bit_addr(task) ?
5121 			    TASK_DYLD_ALL_IMAGE_INFO_64 :
5122 			    TASK_DYLD_ALL_IMAGE_INFO_32;
5123 			*task_info_count = TASK_DYLD_INFO_COUNT;
5124 		} else {
5125 			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5126 		}
5127 		break;
5128 	}
5129 
5130 	case TASK_EXTMOD_INFO:
5131 	{
5132 		task_extmod_info_t info;
5133 		void *p;
5134 
5135 		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5136 			error = KERN_INVALID_ARGUMENT;
5137 			break;
5138 		}
5139 
5140 		info = (task_extmod_info_t)task_info_out;
5141 
5142 		p = get_bsdtask_info(task);
5143 		if (p) {
5144 			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5145 		} else {
5146 			bzero(info->task_uuid, sizeof(info->task_uuid));
5147 		}
5148 		info->extmod_statistics = task->extmod_statistics;
5149 		*task_info_count = TASK_EXTMOD_INFO_COUNT;
5150 
5151 		break;
5152 	}
5153 
5154 	case TASK_KERNELMEMORY_INFO:
5155 	{
5156 		task_kernelmemory_info_t        tkm_info;
5157 		ledger_amount_t                 credit, debit;
5158 
5159 		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5160 			error = KERN_INVALID_ARGUMENT;
5161 			break;
5162 		}
5163 
5164 		tkm_info = (task_kernelmemory_info_t) task_info_out;
5165 		tkm_info->total_palloc = 0;
5166 		tkm_info->total_pfree = 0;
5167 		tkm_info->total_salloc = 0;
5168 		tkm_info->total_sfree = 0;
5169 
5170 		if (task == kernel_task) {
5171 			/*
5172 			 * All shared allocs/frees from other tasks count against
5173 			 * the kernel private memory usage.  If we are looking up
5174 			 * info for the kernel task, gather from everywhere.
5175 			 */
5176 			task_unlock(task);
5177 
5178 			/* start by accounting for all the terminated tasks against the kernel */
5179 			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5180 			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5181 
5182 			/* count all other task/thread shared alloc/free against the kernel */
5183 			lck_mtx_lock(&tasks_threads_lock);
5184 
5185 			/* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5186 			queue_iterate(&tasks, task, task_t, tasks) {
5187 				if (task == kernel_task) {
5188 					if (ledger_get_entries(task->ledger,
5189 					    task_ledgers.tkm_private, &credit,
5190 					    &debit) == KERN_SUCCESS) {
5191 						tkm_info->total_palloc += credit;
5192 						tkm_info->total_pfree += debit;
5193 					}
5194 				}
5195 				if (!ledger_get_entries(task->ledger,
5196 				    task_ledgers.tkm_shared, &credit, &debit)) {
5197 					tkm_info->total_palloc += credit;
5198 					tkm_info->total_pfree += debit;
5199 				}
5200 			}
5201 			lck_mtx_unlock(&tasks_threads_lock);
5202 		} else {
5203 			if (!ledger_get_entries(task->ledger,
5204 			    task_ledgers.tkm_private, &credit, &debit)) {
5205 				tkm_info->total_palloc = credit;
5206 				tkm_info->total_pfree = debit;
5207 			}
5208 			if (!ledger_get_entries(task->ledger,
5209 			    task_ledgers.tkm_shared, &credit, &debit)) {
5210 				tkm_info->total_salloc = credit;
5211 				tkm_info->total_sfree = debit;
5212 			}
5213 			task_unlock(task);
5214 		}
5215 
5216 		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5217 		return KERN_SUCCESS;
5218 	}
5219 
5220 	/* OBSOLETE */
5221 	case TASK_SCHED_FIFO_INFO:
5222 	{
5223 		if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5224 			error = KERN_INVALID_ARGUMENT;
5225 			break;
5226 		}
5227 
5228 		error = KERN_INVALID_POLICY;
5229 		break;
5230 	}
5231 
5232 	/* OBSOLETE */
5233 	case TASK_SCHED_RR_INFO:
5234 	{
5235 		policy_rr_base_t        rr_base;
5236 		uint32_t quantum_time;
5237 		uint64_t quantum_ns;
5238 
5239 		if (*task_info_count < POLICY_RR_BASE_COUNT) {
5240 			error = KERN_INVALID_ARGUMENT;
5241 			break;
5242 		}
5243 
5244 		rr_base = (policy_rr_base_t) task_info_out;
5245 
5246 		if (task != kernel_task) {
5247 			error = KERN_INVALID_POLICY;
5248 			break;
5249 		}
5250 
5251 		rr_base->base_priority = task->priority;
5252 
5253 		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5254 		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5255 
5256 		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5257 
5258 		*task_info_count = POLICY_RR_BASE_COUNT;
5259 		break;
5260 	}
5261 
5262 	/* OBSOLETE */
5263 	case TASK_SCHED_TIMESHARE_INFO:
5264 	{
5265 		policy_timeshare_base_t ts_base;
5266 
5267 		if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5268 			error = KERN_INVALID_ARGUMENT;
5269 			break;
5270 		}
5271 
5272 		ts_base = (policy_timeshare_base_t) task_info_out;
5273 
5274 		if (task == kernel_task) {
5275 			error = KERN_INVALID_POLICY;
5276 			break;
5277 		}
5278 
5279 		ts_base->base_priority = task->priority;
5280 
5281 		*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5282 		break;
5283 	}
5284 
5285 	case TASK_SECURITY_TOKEN:
5286 	{
5287 		security_token_t        *sec_token_p;
5288 
5289 		if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5290 			error = KERN_INVALID_ARGUMENT;
5291 			break;
5292 		}
5293 
5294 		sec_token_p = (security_token_t *) task_info_out;
5295 
5296 		*sec_token_p = *task_get_sec_token(task);
5297 
5298 		*task_info_count = TASK_SECURITY_TOKEN_COUNT;
5299 		break;
5300 	}
5301 
5302 	case TASK_AUDIT_TOKEN:
5303 	{
5304 		audit_token_t   *audit_token_p;
5305 
5306 		if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5307 			error = KERN_INVALID_ARGUMENT;
5308 			break;
5309 		}
5310 
5311 		audit_token_p = (audit_token_t *) task_info_out;
5312 
5313 		*audit_token_p = *task_get_audit_token(task);
5314 
5315 		*task_info_count = TASK_AUDIT_TOKEN_COUNT;
5316 		break;
5317 	}
5318 
5319 	case TASK_SCHED_INFO:
5320 		error = KERN_INVALID_ARGUMENT;
5321 		break;
5322 
5323 	case TASK_EVENTS_INFO:
5324 	{
5325 		task_events_info_t      events_info;
5326 		thread_t                thread;
5327 		uint64_t                n_syscalls_mach, n_syscalls_unix, n_csw;
5328 
5329 		if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5330 			error = KERN_INVALID_ARGUMENT;
5331 			break;
5332 		}
5333 
5334 		events_info = (task_events_info_t) task_info_out;
5335 
5336 
5337 		events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5338 		events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5339 		events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5340 		events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5341 		events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5342 
5343 		n_syscalls_mach = task->syscalls_mach;
5344 		n_syscalls_unix = task->syscalls_unix;
5345 		n_csw = task->c_switch;
5346 
5347 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5348 			n_csw           += thread->c_switch;
5349 			n_syscalls_mach += thread->syscalls_mach;
5350 			n_syscalls_unix += thread->syscalls_unix;
5351 		}
5352 
5353 		events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5354 		events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5355 		events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5356 
5357 		*task_info_count = TASK_EVENTS_INFO_COUNT;
5358 		break;
5359 	}
5360 	case TASK_AFFINITY_TAG_INFO:
5361 	{
5362 		if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5363 			error = KERN_INVALID_ARGUMENT;
5364 			break;
5365 		}
5366 
5367 		error = task_affinity_info(task, task_info_out, task_info_count);
5368 		break;
5369 	}
5370 	case TASK_POWER_INFO:
5371 	{
5372 		if (*task_info_count < TASK_POWER_INFO_COUNT) {
5373 			error = KERN_INVALID_ARGUMENT;
5374 			break;
5375 		}
5376 
5377 		task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5378 		break;
5379 	}
5380 
5381 	case TASK_POWER_INFO_V2:
5382 	{
5383 		if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5384 			error = KERN_INVALID_ARGUMENT;
5385 			break;
5386 		}
5387 		task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5388 		task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5389 		break;
5390 	}
5391 
5392 	case TASK_VM_INFO:
5393 	case TASK_VM_INFO_PURGEABLE:
5394 	{
5395 		task_vm_info_t          vm_info;
5396 		vm_map_t                map;
5397 		ledger_amount_t         tmp_amount;
5398 
5399 		struct proc *p;
5400 		uint32_t platform, sdk;
5401 		p = current_proc();
5402 		platform = proc_platform(p);
5403 		sdk = proc_sdk(p);
5404 		if (original_task_info_count > TASK_VM_INFO_COUNT) {
5405 			/*
5406 			 * Some iOS apps pass an incorrect value for
5407 			 * task_info_count, expressed in number of bytes
5408 			 * instead of number of "natural_t" elements, which
5409 			 * can lead to binary compatibility issues (including
5410 			 * stack corruption) when the data structure is
5411 			 * expanded in the future.
5412 			 * Let's make this potential issue visible by
5413 			 * logging about it...
5414 			 */
5415 			printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5416 			    "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5417 			    "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5418 			    __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5419 			    flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5420 			    platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5421 			DTRACE_VM4(suspicious_task_vm_info_count,
5422 			    mach_msg_type_number_t, original_task_info_count,
5423 			    mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5424 			    uint32_t, platform,
5425 			    uint32_t, sdk);
5426 		}
5427 #if __arm64__
5428 		if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5429 		    platform == PLATFORM_IOS &&
5430 		    sdk != 0 &&
5431 		    (sdk >> 16) <= 12) {
5432 			/*
5433 			 * Some iOS apps pass an incorrect value for
5434 			 * task_info_count, expressed in number of bytes
5435 			 * instead of number of "natural_t" elements.
5436 			 * For the sake of backwards binary compatibility
5437 			 * for apps built with an iOS12 or older SDK and using
5438 			 * the "rev2" data structure, let's fix task_info_count
5439 			 * for them, to avoid stomping past the actual end
5440 			 * of their buffer.
5441 			 */
5442 #if DEVELOPMENT || DEBUG
5443 			printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5444 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5445 			    proc_name_address(p), original_task_info_count,
5446 			    TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5447 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5448 #endif /* DEVELOPMENT || DEBUG */
5449 			DTRACE_VM4(workaround_task_vm_info_count,
5450 			    mach_msg_type_number_t, original_task_info_count,
5451 			    mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5452 			    uint32_t, platform,
5453 			    uint32_t, sdk);
5454 			original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5455 			*task_info_count = original_task_info_count;
5456 		}
5457 		if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5458 		    platform == PLATFORM_IOS &&
5459 		    sdk != 0 &&
5460 		    (sdk >> 16) <= 15) {
5461 			/*
5462 			 * Some iOS apps pass an incorrect value for
5463 			 * task_info_count, expressed in number of bytes
5464 			 * instead of number of "natural_t" elements.
5465 			 */
5466 			printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5467 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5468 			    proc_name_address(p), original_task_info_count,
5469 			    TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5470 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5471 			DTRACE_VM4(workaround_task_vm_info_count,
5472 			    mach_msg_type_number_t, original_task_info_count,
5473 			    mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5474 			    uint32_t, platform,
5475 			    uint32_t, sdk);
5476 #if DEVELOPMENT || DEBUG
5477 			/*
5478 			 * For the sake of internal builds livability,
5479 			 * work around this user-space bug by capping the
5480 			 * buffer's size to what it was with the iOS15 SDK.
5481 			 */
5482 			original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5483 			*task_info_count = original_task_info_count;
5484 #endif /* DEVELOPMENT || DEBUG */
5485 		}
5486 #endif /* __arm64__ */
5487 
5488 		if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5489 			error = KERN_INVALID_ARGUMENT;
5490 			break;
5491 		}
5492 
5493 		vm_info = (task_vm_info_t)task_info_out;
5494 
5495 		/*
5496 		 * Do not hold both the task and map locks,
5497 		 * so convert the task lock into a map reference,
5498 		 * drop the task lock, then lock the map.
5499 		 */
5500 		if (is_kernel_task) {
5501 			map = kernel_map;
5502 			task_unlock(task);
5503 			/* no lock, no reference */
5504 		} else {
5505 			map = task->map;
5506 			vm_map_reference(map);
5507 			task_unlock(task);
5508 			vm_map_lock_read(map);
5509 		}
5510 
5511 		vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5512 		vm_info->region_count = map->hdr.nentries;
5513 		vm_info->page_size = vm_map_page_size(map);
5514 
5515 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5516 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5517 
5518 		vm_info->device = 0;
5519 		vm_info->device_peak = 0;
5520 		ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5521 		ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5522 		ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5523 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5524 		ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5525 		ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5526 		ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5527 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5528 		ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5529 
5530 		vm_info->purgeable_volatile_pmap = 0;
5531 		vm_info->purgeable_volatile_resident = 0;
5532 		vm_info->purgeable_volatile_virtual = 0;
5533 		if (is_kernel_task) {
5534 			/*
5535 			 * We do not maintain the detailed stats for the
5536 			 * kernel_pmap, so just count everything as
5537 			 * "internal"...
5538 			 */
5539 			vm_info->internal = vm_info->resident_size;
5540 			/*
5541 			 * ... but since the memory held by the VM compressor
5542 			 * in the kernel address space ought to be attributed
5543 			 * to user-space tasks, we subtract it from "internal"
5544 			 * to give memory reporting tools a more accurate idea
5545 			 * of what the kernel itself is actually using, instead
5546 			 * of making it look like the kernel is leaking memory
5547 			 * when the system is under memory pressure.
5548 			 */
5549 			vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5550 			    PAGE_SIZE);
5551 		} else {
5552 			mach_vm_size_t  volatile_virtual_size;
5553 			mach_vm_size_t  volatile_resident_size;
5554 			mach_vm_size_t  volatile_compressed_size;
5555 			mach_vm_size_t  volatile_pmap_size;
5556 			mach_vm_size_t  volatile_compressed_pmap_size;
5557 			kern_return_t   kr;
5558 
5559 			if (flavor == TASK_VM_INFO_PURGEABLE) {
5560 				kr = vm_map_query_volatile(
5561 					map,
5562 					&volatile_virtual_size,
5563 					&volatile_resident_size,
5564 					&volatile_compressed_size,
5565 					&volatile_pmap_size,
5566 					&volatile_compressed_pmap_size);
5567 				if (kr == KERN_SUCCESS) {
5568 					vm_info->purgeable_volatile_pmap =
5569 					    volatile_pmap_size;
5570 					if (radar_20146450) {
5571 						vm_info->compressed -=
5572 						    volatile_compressed_pmap_size;
5573 					}
5574 					vm_info->purgeable_volatile_resident =
5575 					    volatile_resident_size;
5576 					vm_info->purgeable_volatile_virtual =
5577 					    volatile_virtual_size;
5578 				}
5579 			}
5580 		}
5581 		*task_info_count = TASK_VM_INFO_REV0_COUNT;
5582 
5583 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5584 			/* must be captured while we still have the map lock */
5585 			vm_info->min_address = map->min_offset;
5586 			vm_info->max_address = map->max_offset;
5587 		}
5588 
5589 		/*
5590 		 * Done with vm map things, can drop the map lock and reference,
5591 		 * and take the task lock back.
5592 		 *
5593 		 * Re-validate that the task didn't die on us.
5594 		 */
5595 		if (!is_kernel_task) {
5596 			vm_map_unlock_read(map);
5597 			vm_map_deallocate(map);
5598 		}
5599 		map = VM_MAP_NULL;
5600 
5601 		task_lock(task);
5602 
5603 		if ((task != current_task()) && (!task->active)) {
5604 			error = KERN_INVALID_ARGUMENT;
5605 			break;
5606 		}
5607 
5608 		if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5609 			vm_info->phys_footprint =
5610 			    (mach_vm_size_t) get_task_phys_footprint(task);
5611 			*task_info_count = TASK_VM_INFO_REV1_COUNT;
5612 		}
5613 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5614 			/* data was captured above */
5615 			*task_info_count = TASK_VM_INFO_REV2_COUNT;
5616 		}
5617 
5618 		if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5619 			ledger_get_lifetime_max(task->ledger,
5620 			    task_ledgers.phys_footprint,
5621 			    &vm_info->ledger_phys_footprint_peak);
5622 			ledger_get_balance(task->ledger,
5623 			    task_ledgers.purgeable_nonvolatile,
5624 			    &vm_info->ledger_purgeable_nonvolatile);
5625 			ledger_get_balance(task->ledger,
5626 			    task_ledgers.purgeable_nonvolatile_compressed,
5627 			    &vm_info->ledger_purgeable_novolatile_compressed);
5628 			ledger_get_balance(task->ledger,
5629 			    task_ledgers.purgeable_volatile,
5630 			    &vm_info->ledger_purgeable_volatile);
5631 			ledger_get_balance(task->ledger,
5632 			    task_ledgers.purgeable_volatile_compressed,
5633 			    &vm_info->ledger_purgeable_volatile_compressed);
5634 			ledger_get_balance(task->ledger,
5635 			    task_ledgers.network_nonvolatile,
5636 			    &vm_info->ledger_tag_network_nonvolatile);
5637 			ledger_get_balance(task->ledger,
5638 			    task_ledgers.network_nonvolatile_compressed,
5639 			    &vm_info->ledger_tag_network_nonvolatile_compressed);
5640 			ledger_get_balance(task->ledger,
5641 			    task_ledgers.network_volatile,
5642 			    &vm_info->ledger_tag_network_volatile);
5643 			ledger_get_balance(task->ledger,
5644 			    task_ledgers.network_volatile_compressed,
5645 			    &vm_info->ledger_tag_network_volatile_compressed);
5646 			ledger_get_balance(task->ledger,
5647 			    task_ledgers.media_footprint,
5648 			    &vm_info->ledger_tag_media_footprint);
5649 			ledger_get_balance(task->ledger,
5650 			    task_ledgers.media_footprint_compressed,
5651 			    &vm_info->ledger_tag_media_footprint_compressed);
5652 			ledger_get_balance(task->ledger,
5653 			    task_ledgers.media_nofootprint,
5654 			    &vm_info->ledger_tag_media_nofootprint);
5655 			ledger_get_balance(task->ledger,
5656 			    task_ledgers.media_nofootprint_compressed,
5657 			    &vm_info->ledger_tag_media_nofootprint_compressed);
5658 			ledger_get_balance(task->ledger,
5659 			    task_ledgers.graphics_footprint,
5660 			    &vm_info->ledger_tag_graphics_footprint);
5661 			ledger_get_balance(task->ledger,
5662 			    task_ledgers.graphics_footprint_compressed,
5663 			    &vm_info->ledger_tag_graphics_footprint_compressed);
5664 			ledger_get_balance(task->ledger,
5665 			    task_ledgers.graphics_nofootprint,
5666 			    &vm_info->ledger_tag_graphics_nofootprint);
5667 			ledger_get_balance(task->ledger,
5668 			    task_ledgers.graphics_nofootprint_compressed,
5669 			    &vm_info->ledger_tag_graphics_nofootprint_compressed);
5670 			ledger_get_balance(task->ledger,
5671 			    task_ledgers.neural_footprint,
5672 			    &vm_info->ledger_tag_neural_footprint);
5673 			ledger_get_balance(task->ledger,
5674 			    task_ledgers.neural_footprint_compressed,
5675 			    &vm_info->ledger_tag_neural_footprint_compressed);
5676 			ledger_get_balance(task->ledger,
5677 			    task_ledgers.neural_nofootprint,
5678 			    &vm_info->ledger_tag_neural_nofootprint);
5679 			ledger_get_balance(task->ledger,
5680 			    task_ledgers.neural_nofootprint_compressed,
5681 			    &vm_info->ledger_tag_neural_nofootprint_compressed);
5682 			*task_info_count = TASK_VM_INFO_REV3_COUNT;
5683 		}
5684 		if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5685 			if (get_bsdtask_info(task)) {
5686 				vm_info->limit_bytes_remaining =
5687 				    memorystatus_available_memory_internal(get_bsdtask_info(task));
5688 			} else {
5689 				vm_info->limit_bytes_remaining = 0;
5690 			}
5691 			*task_info_count = TASK_VM_INFO_REV4_COUNT;
5692 		}
5693 		if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5694 			thread_t thread;
5695 			uint64_t total = task->decompressions;
5696 			queue_iterate(&task->threads, thread, thread_t, task_threads) {
5697 				total += thread->decompressions;
5698 			}
5699 			vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
5700 			*task_info_count = TASK_VM_INFO_REV5_COUNT;
5701 		}
5702 		if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
5703 			ledger_get_balance(task->ledger, task_ledgers.swapins,
5704 			    &vm_info->ledger_swapins);
5705 			*task_info_count = TASK_VM_INFO_REV6_COUNT;
5706 		}
5707 
5708 		break;
5709 	}
5710 
5711 	case TASK_WAIT_STATE_INFO:
5712 	{
5713 		/*
5714 		 * Deprecated flavor. Currently allowing some results until all users
5715 		 * stop calling it. The results may not be accurate.
5716 		 */
5717 		task_wait_state_info_t  wait_state_info;
5718 		uint64_t total_sfi_ledger_val = 0;
5719 
5720 		if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5721 			error = KERN_INVALID_ARGUMENT;
5722 			break;
5723 		}
5724 
5725 		wait_state_info = (task_wait_state_info_t) task_info_out;
5726 
5727 		wait_state_info->total_wait_state_time = 0;
5728 		bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5729 
5730 #if CONFIG_SCHED_SFI
5731 		int i, prev_lentry = -1;
5732 		int64_t  val_credit, val_debit;
5733 
5734 		for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5735 			val_credit = 0;
5736 			/*
5737 			 * checking with prev_lentry != entry ensures adjacent classes
5738 			 * which share the same ledger do not add wait times twice.
5739 			 * Note: Use ledger() call to get data for each individual sfi class.
5740 			 */
5741 			if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5742 			    KERN_SUCCESS == ledger_get_entries(task->ledger,
5743 			    task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5744 				total_sfi_ledger_val += val_credit;
5745 			}
5746 			prev_lentry = task_ledgers.sfi_wait_times[i];
5747 		}
5748 
5749 #endif /* CONFIG_SCHED_SFI */
5750 		wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5751 		*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5752 
5753 		break;
5754 	}
5755 	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5756 	{
5757 #if DEVELOPMENT || DEBUG
5758 		pvm_account_info_t      acnt_info;
5759 
5760 		if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5761 			error = KERN_INVALID_ARGUMENT;
5762 			break;
5763 		}
5764 
5765 		if (task_info_out == NULL) {
5766 			error = KERN_INVALID_ARGUMENT;
5767 			break;
5768 		}
5769 
5770 		acnt_info = (pvm_account_info_t) task_info_out;
5771 
5772 		error = vm_purgeable_account(task, acnt_info);
5773 
5774 		*task_info_count = PVM_ACCOUNT_INFO_COUNT;
5775 
5776 		break;
5777 #else /* DEVELOPMENT || DEBUG */
5778 		error = KERN_NOT_SUPPORTED;
5779 		break;
5780 #endif /* DEVELOPMENT || DEBUG */
5781 	}
5782 	case TASK_FLAGS_INFO:
5783 	{
5784 		task_flags_info_t               flags_info;
5785 
5786 		if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5787 			error = KERN_INVALID_ARGUMENT;
5788 			break;
5789 		}
5790 
5791 		flags_info = (task_flags_info_t)task_info_out;
5792 
5793 		/* only publish the 64-bit flag of the task */
5794 		flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5795 
5796 		*task_info_count = TASK_FLAGS_INFO_COUNT;
5797 		break;
5798 	}
5799 
5800 	case TASK_DEBUG_INFO_INTERNAL:
5801 	{
5802 #if DEVELOPMENT || DEBUG
5803 		task_debug_info_internal_t dbg_info;
5804 		ipc_space_t space = task->itk_space;
5805 		if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5806 			error = KERN_NOT_SUPPORTED;
5807 			break;
5808 		}
5809 
5810 		if (task_info_out == NULL) {
5811 			error = KERN_INVALID_ARGUMENT;
5812 			break;
5813 		}
5814 		dbg_info = (task_debug_info_internal_t) task_info_out;
5815 		dbg_info->ipc_space_size = 0;
5816 
5817 		if (space) {
5818 			smr_global_enter();
5819 			ipc_entry_table_t table = smr_entered_load(&space->is_table);
5820 			if (table) {
5821 				dbg_info->ipc_space_size =
5822 				    ipc_entry_table_count(table);
5823 			}
5824 			smr_global_leave();
5825 		}
5826 
5827 		dbg_info->suspend_count = task->suspend_count;
5828 
5829 		error = KERN_SUCCESS;
5830 		*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5831 		break;
5832 #else /* DEVELOPMENT || DEBUG */
5833 		error = KERN_NOT_SUPPORTED;
5834 		break;
5835 #endif /* DEVELOPMENT || DEBUG */
5836 	}
5837 	default:
5838 		error = KERN_INVALID_ARGUMENT;
5839 	}
5840 
5841 	task_unlock(task);
5842 	return error;
5843 }
5844 
5845 /*
5846  * task_info_from_user
5847  *
5848  * When calling task_info from user space,
5849  * this function will be executed as mig server side
5850  * instead of calling directly into task_info.
5851  * This gives the possibility to perform more security
5852  * checks on task_port.
5853  *
5854  * In the case of TASK_DYLD_INFO, we require the more
5855  * privileged task_read_port not the less-privileged task_name_port.
5856  *
5857  */
5858 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5859 task_info_from_user(
5860 	mach_port_t             task_port,
5861 	task_flavor_t           flavor,
5862 	task_info_t             task_info_out,
5863 	mach_msg_type_number_t  *task_info_count)
5864 {
5865 	task_t task;
5866 	kern_return_t ret;
5867 
5868 	if (flavor == TASK_DYLD_INFO) {
5869 		task = convert_port_to_task_read(task_port);
5870 	} else {
5871 		task = convert_port_to_task_name(task_port);
5872 	}
5873 
5874 	ret = task_info(task, flavor, task_info_out, task_info_count);
5875 
5876 	task_deallocate(task);
5877 
5878 	return ret;
5879 }
5880 
5881 /*
5882  * Routine: task_dyld_process_info_update_helper
5883  *
5884  * Release send rights in release_ports.
5885  *
5886  * If no active ports found in task's dyld notifier array, unset the magic value
5887  * in user space to indicate so.
5888  *
5889  * Condition:
5890  *      task's itk_lock is locked, and is unlocked upon return.
5891  *      Global g_dyldinfo_mtx is locked, and is unlocked upon return.
5892  */
5893 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)5894 task_dyld_process_info_update_helper(
5895 	task_t                  task,
5896 	size_t                  active_count,
5897 	vm_map_address_t        magic_addr,    /* a userspace address */
5898 	ipc_port_t             *release_ports,
5899 	size_t                  release_count)
5900 {
5901 	void *notifiers_ptr = NULL;
5902 
5903 	assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5904 
5905 	if (active_count == 0) {
5906 		assert(task->itk_dyld_notify != NULL);
5907 		notifiers_ptr = task->itk_dyld_notify;
5908 		task->itk_dyld_notify = NULL;
5909 		itk_unlock(task);
5910 
5911 		kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
5912 		(void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
5913 	} else {
5914 		itk_unlock(task);
5915 		(void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
5916 		    magic_addr);     /* reset magic */
5917 	}
5918 
5919 	lck_mtx_unlock(&g_dyldinfo_mtx);
5920 
5921 	for (size_t i = 0; i < release_count; i++) {
5922 		ipc_port_release_send(release_ports[i]);
5923 	}
5924 }
5925 
5926 /*
5927  * Routine: task_dyld_process_info_notify_register
5928  *
5929  * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
5930  * memory for the array if it's the first port to be registered. Also cleanup
5931  * any dead rights found in the array.
5932  *
5933  * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
5934  *
5935  * Args:
5936  *     task:   Target task for the registration.
5937  *     sright: A send right.
5938  *
5939  * Returns:
5940  *     KERN_SUCCESS: Registration succeeded.
5941  *     KERN_INVALID_TASK: task is invalid.
5942  *     KERN_INVALID_RIGHT: sright is invalid.
5943  *     KERN_DENIED: Security policy denied this call.
5944  *     KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
5945  *     KERN_NO_SPACE: No available notifier port slot left for this task.
5946  *     KERN_RIGHT_EXISTS: The notifier port is already registered and active.
5947  *
5948  *     Other error code see task_info().
5949  *
5950  * See Also:
5951  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
5952  */
5953 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)5954 task_dyld_process_info_notify_register(
5955 	task_t                  task,
5956 	ipc_port_t              sright)
5957 {
5958 	struct task_dyld_info dyld_info;
5959 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
5960 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
5961 	uint32_t release_count = 0, active_count = 0;
5962 	mach_vm_address_t ports_addr; /* a user space address */
5963 	kern_return_t kr;
5964 	boolean_t right_exists = false;
5965 	ipc_port_t *notifiers_ptr = NULL;
5966 	ipc_port_t *portp;
5967 
5968 	if (task == TASK_NULL || task == kernel_task) {
5969 		return KERN_INVALID_TASK;
5970 	}
5971 
5972 	if (!IP_VALID(sright)) {
5973 		return KERN_INVALID_RIGHT;
5974 	}
5975 
5976 #if CONFIG_MACF
5977 	if (mac_task_check_dyld_process_info_notify_register()) {
5978 		return KERN_DENIED;
5979 	}
5980 #endif
5981 
5982 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
5983 	if (kr) {
5984 		return kr;
5985 	}
5986 
5987 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
5988 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5989 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
5990 	} else {
5991 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5992 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
5993 	}
5994 
5995 	if (task->itk_dyld_notify == NULL) {
5996 		notifiers_ptr = kalloc_type(ipc_port_t,
5997 		    DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
5998 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
5999 	}
6000 
6001 	lck_mtx_lock(&g_dyldinfo_mtx);
6002 	itk_lock(task);
6003 
6004 	if (task->itk_dyld_notify == NULL) {
6005 		task->itk_dyld_notify = notifiers_ptr;
6006 		notifiers_ptr = NULL;
6007 	}
6008 
6009 	assert(task->itk_dyld_notify != NULL);
6010 	/* First pass: clear dead names and check for duplicate registration */
6011 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6012 		portp = &task->itk_dyld_notify[slot];
6013 		if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
6014 			release_ports[release_count++] = *portp;
6015 			*portp = IPC_PORT_NULL;
6016 		} else if (*portp == sright) {
6017 			/* the port is already registered and is active */
6018 			right_exists = true;
6019 		}
6020 
6021 		if (*portp != IPC_PORT_NULL) {
6022 			active_count++;
6023 		}
6024 	}
6025 
6026 	if (right_exists) {
6027 		/* skip second pass */
6028 		kr = KERN_RIGHT_EXISTS;
6029 		goto out;
6030 	}
6031 
6032 	/* Second pass: register the port */
6033 	kr = KERN_NO_SPACE;
6034 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6035 		portp = &task->itk_dyld_notify[slot];
6036 		if (*portp == IPC_PORT_NULL) {
6037 			*portp = sright;
6038 			active_count++;
6039 			kr = KERN_SUCCESS;
6040 			break;
6041 		}
6042 	}
6043 
6044 out:
6045 	assert(active_count > 0);
6046 
6047 	task_dyld_process_info_update_helper(task, active_count,
6048 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6049 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6050 
6051 	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6052 
6053 	return kr;
6054 }
6055 
6056 /*
6057  * Routine: task_dyld_process_info_notify_deregister
6058  *
6059  * Remove a send right in target task's itk_dyld_notify array matching the receive
6060  * right name passed in. Deallocate kernel memory for the array if it's the last port to
6061  * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6062  *
6063  * Does not consume any reference.
6064  *
6065  * Args:
6066  *     task: Target task for the deregistration.
6067  *     rcv_name: The name denoting the receive right in caller's space.
6068  *
6069  * Returns:
6070  *     KERN_SUCCESS: A matching entry found and degistration succeeded.
6071  *     KERN_INVALID_TASK: task is invalid.
6072  *     KERN_INVALID_NAME: name is invalid.
6073  *     KERN_DENIED: Security policy denied this call.
6074  *     KERN_FAILURE: A matching entry is not found.
6075  *     KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6076  *
6077  *     Other error code see task_info().
6078  *
6079  * See Also:
6080  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6081  */
6082 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6083 task_dyld_process_info_notify_deregister(
6084 	task_t                  task,
6085 	mach_port_name_t        rcv_name)
6086 {
6087 	struct task_dyld_info dyld_info;
6088 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6089 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6090 	uint32_t release_count = 0, active_count = 0;
6091 	boolean_t port_found = false;
6092 	mach_vm_address_t ports_addr; /* a user space address */
6093 	ipc_port_t sright;
6094 	kern_return_t kr;
6095 	ipc_port_t *portp;
6096 
6097 	if (task == TASK_NULL || task == kernel_task) {
6098 		return KERN_INVALID_TASK;
6099 	}
6100 
6101 	if (!MACH_PORT_VALID(rcv_name)) {
6102 		return KERN_INVALID_NAME;
6103 	}
6104 
6105 #if CONFIG_MACF
6106 	if (mac_task_check_dyld_process_info_notify_register()) {
6107 		return KERN_DENIED;
6108 	}
6109 #endif
6110 
6111 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6112 	if (kr) {
6113 		return kr;
6114 	}
6115 
6116 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6117 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6118 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6119 	} else {
6120 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6121 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6122 	}
6123 
6124 	kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6125 	if (kr) {
6126 		return KERN_INVALID_RIGHT;
6127 	}
6128 
6129 	ip_reference(sright);
6130 	ip_mq_unlock(sright);
6131 
6132 	assert(sright != IPC_PORT_NULL);
6133 
6134 	lck_mtx_lock(&g_dyldinfo_mtx);
6135 	itk_lock(task);
6136 
6137 	if (task->itk_dyld_notify == NULL) {
6138 		itk_unlock(task);
6139 		lck_mtx_unlock(&g_dyldinfo_mtx);
6140 		ip_release(sright);
6141 		return KERN_FAILURE;
6142 	}
6143 
6144 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6145 		portp = &task->itk_dyld_notify[slot];
6146 		if (*portp == sright) {
6147 			release_ports[release_count++] = *portp;
6148 			*portp = IPC_PORT_NULL;
6149 			port_found = true;
6150 		} else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6151 			release_ports[release_count++] = *portp;
6152 			*portp = IPC_PORT_NULL;
6153 		}
6154 
6155 		if (*portp != IPC_PORT_NULL) {
6156 			active_count++;
6157 		}
6158 	}
6159 
6160 	task_dyld_process_info_update_helper(task, active_count,
6161 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6162 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6163 
6164 	ip_release(sright);
6165 
6166 	return port_found ? KERN_SUCCESS : KERN_FAILURE;
6167 }
6168 
6169 /*
6170  *	task_power_info
6171  *
6172  *	Returns power stats for the task.
6173  *	Note: Called with task locked.
6174  */
6175 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,struct task_power_info_extra * extra_info)6176 task_power_info_locked(
6177 	task_t                        task,
6178 	task_power_info_t             info,
6179 	gpu_energy_data_t             ginfo,
6180 	task_power_info_v2_t          infov2,
6181 	struct task_power_info_extra *extra_info)
6182 {
6183 	thread_t                thread;
6184 	ledger_amount_t         tmp;
6185 
6186 	uint64_t                runnable_time_sum = 0;
6187 
6188 	task_lock_assert_owned(task);
6189 
6190 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6191 	    (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6192 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6193 	    (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6194 
6195 	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6196 	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6197 
6198 	struct recount_usage usage = { 0 };
6199 	struct recount_usage usage_perf = { 0 };
6200 	recount_task_usage_perf_only(task, &usage, &usage_perf);
6201 
6202 	info->total_user = usage.ru_user_time_mach;
6203 	info->total_system = usage.ru_system_time_mach;
6204 	runnable_time_sum = task->total_runnable_time;
6205 
6206 	if (ginfo) {
6207 		ginfo->task_gpu_utilisation = task->task_gpu_ns;
6208 	}
6209 
6210 	if (infov2) {
6211 		infov2->task_ptime = usage_perf.ru_system_time_mach +
6212 		    usage_perf.ru_user_time_mach;
6213 		infov2->task_pset_switches = task->ps_switch;
6214 #if CONFIG_PERVASIVE_ENERGY
6215 		infov2->task_energy = usage.ru_energy_nj;
6216 #endif /* CONFIG_PERVASIVE_ENERGY */
6217 	}
6218 
6219 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6220 		spl_t x;
6221 
6222 		if (thread->options & TH_OPT_IDLE_THREAD) {
6223 			continue;
6224 		}
6225 
6226 		x = splsched();
6227 		thread_lock(thread);
6228 
6229 		info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6230 		info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6231 
6232 		if (infov2) {
6233 			infov2->task_pset_switches += thread->ps_switch;
6234 		}
6235 
6236 		runnable_time_sum += timer_grab(&thread->runnable_timer);
6237 
6238 		if (ginfo) {
6239 			ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6240 		}
6241 		thread_unlock(thread);
6242 		splx(x);
6243 	}
6244 
6245 	if (extra_info) {
6246 		extra_info->runnable_time = runnable_time_sum;
6247 #if CONFIG_PERVASIVE_CPI
6248 		extra_info->cycles = usage.ru_cycles;
6249 		extra_info->instructions = usage.ru_instructions;
6250 		extra_info->pcycles = usage_perf.ru_cycles;
6251 		extra_info->pinstructions = usage_perf.ru_instructions;
6252 		extra_info->user_ptime = usage_perf.ru_user_time_mach;
6253 		extra_info->system_ptime = usage_perf.ru_system_time_mach;
6254 #endif // CONFIG_PERVASIVE_CPI
6255 #if CONFIG_PERVASIVE_ENERGY
6256 		extra_info->energy = usage.ru_energy_nj;
6257 		extra_info->penergy = usage_perf.ru_energy_nj;
6258 #endif // CONFIG_PERVASIVE_ENERGY
6259 	}
6260 }
6261 
6262 /*
6263  *	task_gpu_utilisation
6264  *
6265  *	Returns the total gpu time used by the all the threads of the task
6266  *  (both dead and alive)
6267  */
6268 uint64_t
task_gpu_utilisation(task_t task)6269 task_gpu_utilisation(
6270 	task_t  task)
6271 {
6272 	uint64_t gpu_time = 0;
6273 #if defined(__x86_64__)
6274 	thread_t thread;
6275 
6276 	task_lock(task);
6277 	gpu_time += task->task_gpu_ns;
6278 
6279 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6280 		spl_t x;
6281 		x = splsched();
6282 		thread_lock(thread);
6283 		gpu_time += ml_gpu_stat(thread);
6284 		thread_unlock(thread);
6285 		splx(x);
6286 	}
6287 
6288 	task_unlock(task);
6289 #else /* defined(__x86_64__) */
6290 	/* silence compiler warning */
6291 	(void)task;
6292 #endif /* defined(__x86_64__) */
6293 	return gpu_time;
6294 }
6295 
6296 /* This function updates the cpu time in the arrays for each
6297  * effective and requested QoS class
6298  */
6299 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6300 task_update_cpu_time_qos_stats(
6301 	task_t  task,
6302 	uint64_t *eqos_stats,
6303 	uint64_t *rqos_stats)
6304 {
6305 	if (!eqos_stats && !rqos_stats) {
6306 		return;
6307 	}
6308 
6309 	task_lock(task);
6310 	thread_t thread;
6311 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6312 		if (thread->options & TH_OPT_IDLE_THREAD) {
6313 			continue;
6314 		}
6315 
6316 		thread_update_qos_cpu_time(thread);
6317 	}
6318 
6319 	if (eqos_stats) {
6320 		eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6321 		eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6322 		eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6323 		eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6324 		eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6325 		eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6326 		eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6327 	}
6328 
6329 	if (rqos_stats) {
6330 		rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6331 		rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6332 		rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6333 		rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6334 		rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6335 		rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6336 		rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6337 	}
6338 
6339 	task_unlock(task);
6340 }
6341 
6342 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6343 task_purgable_info(
6344 	task_t                  task,
6345 	task_purgable_info_t    *stats)
6346 {
6347 	if (task == TASK_NULL || stats == NULL) {
6348 		return KERN_INVALID_ARGUMENT;
6349 	}
6350 	/* Take task reference */
6351 	task_reference(task);
6352 	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6353 	/* Drop task reference */
6354 	task_deallocate(task);
6355 	return KERN_SUCCESS;
6356 }
6357 
6358 void
task_vtimer_set(task_t task,integer_t which)6359 task_vtimer_set(
6360 	task_t          task,
6361 	integer_t       which)
6362 {
6363 	thread_t        thread;
6364 	spl_t           x;
6365 
6366 	task_lock(task);
6367 
6368 	task->vtimers |= which;
6369 
6370 	switch (which) {
6371 	case TASK_VTIMER_USER:
6372 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6373 			x = splsched();
6374 			thread_lock(thread);
6375 			struct recount_times_mach times = recount_thread_times(thread);
6376 			thread->vtimer_user_save = times.rtm_user;
6377 			thread_unlock(thread);
6378 			splx(x);
6379 		}
6380 		break;
6381 
6382 	case TASK_VTIMER_PROF:
6383 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6384 			x = splsched();
6385 			thread_lock(thread);
6386 			thread->vtimer_prof_save = recount_thread_time_mach(thread);
6387 			thread_unlock(thread);
6388 			splx(x);
6389 		}
6390 		break;
6391 
6392 	case TASK_VTIMER_RLIM:
6393 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6394 			x = splsched();
6395 			thread_lock(thread);
6396 			thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6397 			thread_unlock(thread);
6398 			splx(x);
6399 		}
6400 		break;
6401 	}
6402 
6403 	task_unlock(task);
6404 }
6405 
6406 void
task_vtimer_clear(task_t task,integer_t which)6407 task_vtimer_clear(
6408 	task_t          task,
6409 	integer_t       which)
6410 {
6411 	task_lock(task);
6412 
6413 	task->vtimers &= ~which;
6414 
6415 	task_unlock(task);
6416 }
6417 
6418 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6419 task_vtimer_update(
6420 	__unused
6421 	task_t          task,
6422 	integer_t       which,
6423 	uint32_t        *microsecs)
6424 {
6425 	thread_t        thread = current_thread();
6426 	uint32_t        tdelt = 0;
6427 	clock_sec_t     secs = 0;
6428 	uint64_t        tsum;
6429 
6430 	assert(task == current_task());
6431 
6432 	spl_t s = splsched();
6433 	thread_lock(thread);
6434 
6435 	if ((task->vtimers & which) != (uint32_t)which) {
6436 		thread_unlock(thread);
6437 		splx(s);
6438 		return;
6439 	}
6440 
6441 	switch (which) {
6442 	case TASK_VTIMER_USER:;
6443 		struct recount_times_mach times = recount_thread_times(thread);
6444 		tsum = times.rtm_user;
6445 		tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6446 		thread->vtimer_user_save = tsum;
6447 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6448 		break;
6449 
6450 	case TASK_VTIMER_PROF:
6451 		tsum = recount_current_thread_time_mach();
6452 		tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6453 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6454 		/* if the time delta is smaller than a usec, ignore */
6455 		if (*microsecs != 0) {
6456 			thread->vtimer_prof_save = tsum;
6457 		}
6458 		break;
6459 
6460 	case TASK_VTIMER_RLIM:
6461 		tsum = recount_current_thread_time_mach();
6462 		tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6463 		thread->vtimer_rlim_save = tsum;
6464 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6465 		break;
6466 	}
6467 
6468 	thread_unlock(thread);
6469 	splx(s);
6470 }
6471 
6472 uint64_t
get_task_dispatchqueue_offset(task_t task)6473 get_task_dispatchqueue_offset(
6474 	task_t          task)
6475 {
6476 	return task->dispatchqueue_offset;
6477 }
6478 
6479 void
task_synchronizer_destroy_all(task_t task)6480 task_synchronizer_destroy_all(task_t task)
6481 {
6482 	/*
6483 	 *  Destroy owned semaphores
6484 	 */
6485 	semaphore_destroy_all(task);
6486 }
6487 
6488 /*
6489  * Install default (machine-dependent) initial thread state
6490  * on the task.  Subsequent thread creation will have this initial
6491  * state set on the thread by machine_thread_inherit_taskwide().
6492  * Flavors and structures are exactly the same as those to thread_set_state()
6493  */
6494 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6495 task_set_state(
6496 	task_t task,
6497 	int flavor,
6498 	thread_state_t state,
6499 	mach_msg_type_number_t state_count)
6500 {
6501 	kern_return_t ret;
6502 
6503 	if (task == TASK_NULL) {
6504 		return KERN_INVALID_ARGUMENT;
6505 	}
6506 
6507 	task_lock(task);
6508 
6509 	if (!task->active) {
6510 		task_unlock(task);
6511 		return KERN_FAILURE;
6512 	}
6513 
6514 	ret = machine_task_set_state(task, flavor, state, state_count);
6515 
6516 	task_unlock(task);
6517 	return ret;
6518 }
6519 
6520 /*
6521  * Examine the default (machine-dependent) initial thread state
6522  * on the task, as set by task_set_state().  Flavors and structures
6523  * are exactly the same as those passed to thread_get_state().
6524  */
6525 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6526 task_get_state(
6527 	task_t  task,
6528 	int     flavor,
6529 	thread_state_t state,
6530 	mach_msg_type_number_t *state_count)
6531 {
6532 	kern_return_t ret;
6533 
6534 	if (task == TASK_NULL) {
6535 		return KERN_INVALID_ARGUMENT;
6536 	}
6537 
6538 	task_lock(task);
6539 
6540 	if (!task->active) {
6541 		task_unlock(task);
6542 		return KERN_FAILURE;
6543 	}
6544 
6545 	ret = machine_task_get_state(task, flavor, state, state_count);
6546 
6547 	task_unlock(task);
6548 	return ret;
6549 }
6550 
6551 
6552 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,boolean_t backtrace_only)6553 PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6554 	mach_exception_code_t code,
6555 	mach_exception_subcode_t subcode,
6556 	void *reason,
6557 	boolean_t backtrace_only)
6558 {
6559 #ifdef MACH_BSD
6560 	if (1 == proc_selfpid()) {
6561 		return KERN_NOT_SUPPORTED;              // initproc is immune
6562 	}
6563 #endif
6564 	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6565 		[0] = code,
6566 		[1] = subcode,
6567 	};
6568 	task_t task = current_task();
6569 	kern_return_t kr;
6570 	void *bsd_info = get_bsdtask_info(task);
6571 
6572 	/* (See jetsam-related comments below) */
6573 
6574 	proc_memstat_skip(bsd_info, TRUE);
6575 	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason, backtrace_only);
6576 	proc_memstat_skip(bsd_info, FALSE);
6577 	return kr;
6578 }
6579 
6580 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,bool backtrace_only)6581 task_violated_guard(
6582 	mach_exception_code_t code,
6583 	mach_exception_subcode_t subcode,
6584 	void *reason,
6585 	bool backtrace_only)
6586 {
6587 	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6588 }
6589 
6590 
6591 #if CONFIG_MEMORYSTATUS
6592 
6593 boolean_t
task_get_memlimit_is_active(task_t task)6594 task_get_memlimit_is_active(task_t task)
6595 {
6596 	assert(task != NULL);
6597 
6598 	if (task->memlimit_is_active == 1) {
6599 		return TRUE;
6600 	} else {
6601 		return FALSE;
6602 	}
6603 }
6604 
6605 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6606 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6607 {
6608 	assert(task != NULL);
6609 
6610 	if (memlimit_is_active) {
6611 		task->memlimit_is_active = 1;
6612 	} else {
6613 		task->memlimit_is_active = 0;
6614 	}
6615 }
6616 
6617 boolean_t
task_get_memlimit_is_fatal(task_t task)6618 task_get_memlimit_is_fatal(task_t task)
6619 {
6620 	assert(task != NULL);
6621 
6622 	if (task->memlimit_is_fatal == 1) {
6623 		return TRUE;
6624 	} else {
6625 		return FALSE;
6626 	}
6627 }
6628 
6629 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6630 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6631 {
6632 	assert(task != NULL);
6633 
6634 	if (memlimit_is_fatal) {
6635 		task->memlimit_is_fatal = 1;
6636 	} else {
6637 		task->memlimit_is_fatal = 0;
6638 	}
6639 }
6640 
6641 uint64_t
task_get_dirty_start(task_t task)6642 task_get_dirty_start(task_t task)
6643 {
6644 	return task->memstat_dirty_start;
6645 }
6646 
6647 void
task_set_dirty_start(task_t task,uint64_t start)6648 task_set_dirty_start(task_t task, uint64_t start)
6649 {
6650 	task_lock(task);
6651 	task->memstat_dirty_start = start;
6652 	task_unlock(task);
6653 }
6654 
6655 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6656 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6657 {
6658 	boolean_t triggered = FALSE;
6659 
6660 	assert(task == current_task());
6661 
6662 	/*
6663 	 * Returns true, if task has already triggered an exc_resource exception.
6664 	 */
6665 
6666 	if (memlimit_is_active) {
6667 		triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6668 	} else {
6669 		triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6670 	}
6671 
6672 	return triggered;
6673 }
6674 
6675 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6676 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6677 {
6678 	assert(task == current_task());
6679 
6680 	/*
6681 	 * We allow one exc_resource per process per active/inactive limit.
6682 	 * The limit's fatal attribute does not come into play.
6683 	 */
6684 
6685 	if (memlimit_is_active) {
6686 		task->memlimit_active_exc_resource = 1;
6687 	} else {
6688 		task->memlimit_inactive_exc_resource = 1;
6689 	}
6690 }
6691 
6692 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6693 
6694 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,boolean_t is_fatal)6695 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6696 {
6697 	task_t                                          task            = current_task();
6698 	int                                                     pid         = 0;
6699 	const char                                      *procname       = "unknown";
6700 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6701 	boolean_t send_sync_exc_resource = FALSE;
6702 	void *cur_bsd_info = get_bsdtask_info(current_task());
6703 
6704 #ifdef MACH_BSD
6705 	pid = proc_selfpid();
6706 
6707 	if (pid == 1) {
6708 		/*
6709 		 * Cannot have ReportCrash analyzing
6710 		 * a suspended initproc.
6711 		 */
6712 		return;
6713 	}
6714 
6715 	if (cur_bsd_info != NULL) {
6716 		procname = proc_name_address(cur_bsd_info);
6717 		send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(cur_bsd_info);
6718 	}
6719 #endif
6720 #if CONFIG_COREDUMP
6721 	if (hwm_user_cores) {
6722 		int                             error;
6723 		uint64_t                starttime, end;
6724 		clock_sec_t             secs = 0;
6725 		uint32_t                microsecs = 0;
6726 
6727 		starttime = mach_absolute_time();
6728 		/*
6729 		 * Trigger a coredump of this process. Don't proceed unless we know we won't
6730 		 * be filling up the disk; and ignore the core size resource limit for this
6731 		 * core file.
6732 		 */
6733 		if ((error = coredump(cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6734 			printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6735 		}
6736 		/*
6737 		 * coredump() leaves the task suspended.
6738 		 */
6739 		task_resume_internal(current_task());
6740 
6741 		end = mach_absolute_time();
6742 		absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6743 		printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6744 		    proc_name_address(cur_bsd_info), pid, (int)secs, microsecs);
6745 	}
6746 #endif /* CONFIG_COREDUMP */
6747 
6748 	if (disable_exc_resource) {
6749 		printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6750 		    "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6751 		return;
6752 	}
6753 
6754 	/*
6755 	 * A task that has triggered an EXC_RESOURCE, should not be
6756 	 * jetsammed when the device is under memory pressure.  Here
6757 	 * we set the P_MEMSTAT_SKIP flag so that the process
6758 	 * will be skipped if the memorystatus_thread wakes up.
6759 	 *
6760 	 * This is a debugging aid to ensure we can get a corpse before
6761 	 * the jetsam thread kills the process.
6762 	 * Note that proc_memstat_skip is a no-op on release kernels.
6763 	 */
6764 	proc_memstat_skip(cur_bsd_info, TRUE);
6765 
6766 	code[0] = code[1] = 0;
6767 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6768 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6769 	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6770 
6771 	/*
6772 	 * Do not generate a corpse fork if the violation is a fatal one
6773 	 * or the process wants synchronous EXC_RESOURCE exceptions.
6774 	 */
6775 	if (is_fatal || send_sync_exc_resource || !exc_via_corpse_forking) {
6776 		/* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6777 		if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
6778 			/*
6779 			 * Use the _internal_ variant so that no user-space
6780 			 * process can resume our task from under us.
6781 			 */
6782 			task_suspend_internal(task);
6783 			exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6784 			task_resume_internal(task);
6785 		}
6786 	} else {
6787 		if (audio_active) {
6788 			printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6789 			    "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6790 		} else {
6791 			task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6792 			    code, EXCEPTION_CODE_MAX, NULL, FALSE);
6793 		}
6794 	}
6795 
6796 	/*
6797 	 * After the EXC_RESOURCE has been handled, we must clear the
6798 	 * P_MEMSTAT_SKIP flag so that the process can again be
6799 	 * considered for jetsam if the memorystatus_thread wakes up.
6800 	 */
6801 	proc_memstat_skip(cur_bsd_info, FALSE);         /* clear the flag */
6802 }
6803 
6804 /*
6805  * Callback invoked when a task exceeds its physical footprint limit.
6806  */
6807 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)6808 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6809 {
6810 	ledger_amount_t max_footprint, max_footprint_mb;
6811 #if CONFIG_DEFERRED_RECLAIM
6812 	ledger_amount_t current_footprint;
6813 #endif /* CONFIG_DEFERRED_RECLAIM */
6814 	task_t task;
6815 	boolean_t is_warning;
6816 	boolean_t memlimit_is_active;
6817 	boolean_t memlimit_is_fatal;
6818 
6819 	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6820 		/*
6821 		 * Task memory limits only provide a warning on the way up.
6822 		 */
6823 		return;
6824 	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6825 		/*
6826 		 * This task is in danger of violating a memory limit,
6827 		 * It has exceeded a percentage level of the limit.
6828 		 */
6829 		is_warning = TRUE;
6830 	} else {
6831 		/*
6832 		 * The task has exceeded the physical footprint limit.
6833 		 * This is not a warning but a true limit violation.
6834 		 */
6835 		is_warning = FALSE;
6836 	}
6837 
6838 	task = current_task();
6839 
6840 	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6841 #if CONFIG_DEFERRED_RECLAIM
6842 	if (task->deferred_reclamation_metadata != NULL) {
6843 		/*
6844 		 * Task is enrolled in deferred reclamation.
6845 		 * Do a reclaim to ensure it's really over its limit.
6846 		 */
6847 		vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
6848 		ledger_get_balance(task->ledger, task_ledgers.phys_footprint, &current_footprint);
6849 		if (current_footprint < max_footprint) {
6850 			return;
6851 		}
6852 	}
6853 #endif /* CONFIG_DEFERRED_RECLAIM */
6854 	max_footprint_mb = max_footprint >> 20;
6855 
6856 	memlimit_is_active = task_get_memlimit_is_active(task);
6857 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6858 
6859 	/*
6860 	 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6861 	 * We only generate the exception once per process per memlimit (active/inactive limit).
6862 	 * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6863 	 * and we disable it by marking that memlimit as exception triggered.
6864 	 */
6865 	if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6866 		PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6867 		memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6868 		task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6869 	}
6870 
6871 	memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6872 }
6873 
6874 extern int proc_check_footprint_priv(void);
6875 
6876 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)6877 task_set_phys_footprint_limit(
6878 	task_t task,
6879 	int new_limit_mb,
6880 	int *old_limit_mb)
6881 {
6882 	kern_return_t error;
6883 
6884 	boolean_t memlimit_is_active;
6885 	boolean_t memlimit_is_fatal;
6886 
6887 	if ((error = proc_check_footprint_priv())) {
6888 		return KERN_NO_ACCESS;
6889 	}
6890 
6891 	/*
6892 	 * This call should probably be obsoleted.
6893 	 * But for now, we default to current state.
6894 	 */
6895 	memlimit_is_active = task_get_memlimit_is_active(task);
6896 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6897 
6898 	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6899 }
6900 
6901 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)6902 task_convert_phys_footprint_limit(
6903 	int limit_mb,
6904 	int *converted_limit_mb)
6905 {
6906 	if (limit_mb == -1) {
6907 		/*
6908 		 * No limit
6909 		 */
6910 		if (max_task_footprint != 0) {
6911 			*converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);         /* bytes to MB */
6912 		} else {
6913 			*converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6914 		}
6915 	} else {
6916 		/* nothing to convert */
6917 		*converted_limit_mb = limit_mb;
6918 	}
6919 	return KERN_SUCCESS;
6920 }
6921 
6922 
6923 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)6924 task_set_phys_footprint_limit_internal(
6925 	task_t task,
6926 	int new_limit_mb,
6927 	int *old_limit_mb,
6928 	boolean_t memlimit_is_active,
6929 	boolean_t memlimit_is_fatal)
6930 {
6931 	ledger_amount_t old;
6932 	kern_return_t ret;
6933 
6934 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6935 
6936 	if (ret != KERN_SUCCESS) {
6937 		return ret;
6938 	}
6939 
6940 	/*
6941 	 * Check that limit >> 20 will not give an "unexpected" 32-bit
6942 	 * result. There are, however, implicit assumptions that -1 mb limit
6943 	 * equates to LEDGER_LIMIT_INFINITY.
6944 	 */
6945 	assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6946 
6947 	if (old_limit_mb) {
6948 		*old_limit_mb = (int)(old >> 20);
6949 	}
6950 
6951 	if (new_limit_mb == -1) {
6952 		/*
6953 		 * Caller wishes to remove the limit.
6954 		 */
6955 		ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6956 		    max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6957 		    max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
6958 
6959 		task_lock(task);
6960 		task_set_memlimit_is_active(task, memlimit_is_active);
6961 		task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6962 		task_unlock(task);
6963 
6964 		return KERN_SUCCESS;
6965 	}
6966 
6967 #ifdef CONFIG_NOMONITORS
6968 	return KERN_SUCCESS;
6969 #endif /* CONFIG_NOMONITORS */
6970 
6971 	task_lock(task);
6972 
6973 	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6974 	    (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6975 	    (((ledger_amount_t)new_limit_mb << 20) == old)) {
6976 		/*
6977 		 * memlimit state is not changing
6978 		 */
6979 		task_unlock(task);
6980 		return KERN_SUCCESS;
6981 	}
6982 
6983 	task_set_memlimit_is_active(task, memlimit_is_active);
6984 	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6985 
6986 	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6987 	    (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6988 
6989 	if (task == current_task()) {
6990 		ledger_check_new_balance(current_thread(), task->ledger,
6991 		    task_ledgers.phys_footprint);
6992 	}
6993 
6994 	task_unlock(task);
6995 
6996 	return KERN_SUCCESS;
6997 }
6998 
6999 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)7000 task_get_phys_footprint_limit(
7001 	task_t task,
7002 	int *limit_mb)
7003 {
7004 	ledger_amount_t limit;
7005 	kern_return_t ret;
7006 
7007 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
7008 	if (ret != KERN_SUCCESS) {
7009 		return ret;
7010 	}
7011 
7012 	/*
7013 	 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7014 	 * result. There are, however, implicit assumptions that -1 mb limit
7015 	 * equates to LEDGER_LIMIT_INFINITY.
7016 	 */
7017 	assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7018 	*limit_mb = (int)(limit >> 20);
7019 
7020 	return KERN_SUCCESS;
7021 }
7022 #else /* CONFIG_MEMORYSTATUS */
7023 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7024 task_set_phys_footprint_limit(
7025 	__unused task_t task,
7026 	__unused int new_limit_mb,
7027 	__unused int *old_limit_mb)
7028 {
7029 	return KERN_FAILURE;
7030 }
7031 
7032 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7033 task_get_phys_footprint_limit(
7034 	__unused task_t task,
7035 	__unused int *limit_mb)
7036 {
7037 	return KERN_FAILURE;
7038 }
7039 #endif /* CONFIG_MEMORYSTATUS */
7040 
7041 security_token_t *
task_get_sec_token(task_t task)7042 task_get_sec_token(task_t task)
7043 {
7044 	return &task_get_ro(task)->task_tokens.sec_token;
7045 }
7046 
7047 void
task_set_sec_token(task_t task,security_token_t * token)7048 task_set_sec_token(task_t task, security_token_t *token)
7049 {
7050 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7051 	    task_tokens.sec_token, token);
7052 }
7053 
7054 audit_token_t *
task_get_audit_token(task_t task)7055 task_get_audit_token(task_t task)
7056 {
7057 	return &task_get_ro(task)->task_tokens.audit_token;
7058 }
7059 
7060 void
task_set_audit_token(task_t task,audit_token_t * token)7061 task_set_audit_token(task_t task, audit_token_t *token)
7062 {
7063 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7064 	    task_tokens.audit_token, token);
7065 }
7066 
7067 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7068 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7069 {
7070 	struct task_token_ro_data tokens;
7071 
7072 	tokens = task_get_ro(task)->task_tokens;
7073 	tokens.sec_token = *sec_token;
7074 	tokens.audit_token = *audit_token;
7075 
7076 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7077 	    &tokens);
7078 }
7079 
7080 boolean_t
task_is_privileged(task_t task)7081 task_is_privileged(task_t task)
7082 {
7083 	return task_get_sec_token(task)->val[0] == 0;
7084 }
7085 
7086 #ifdef CONFIG_MACF
7087 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7088 task_get_mach_trap_filter_mask(task_t task)
7089 {
7090 	return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7091 }
7092 
7093 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7094 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7095 {
7096 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7097 	    task_filters.mach_trap_filter_mask, &mask);
7098 }
7099 
7100 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7101 task_get_mach_kobj_filter_mask(task_t task)
7102 {
7103 	return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7104 }
7105 
7106 mach_vm_address_t
task_get_all_image_info_addr(task_t task)7107 task_get_all_image_info_addr(task_t task)
7108 {
7109 	return task->all_image_info_addr;
7110 }
7111 
7112 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7113 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7114 {
7115 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7116 	    task_filters.mach_kobj_filter_mask, &mask);
7117 }
7118 
7119 #endif /* CONFIG_MACF */
7120 
7121 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7122 task_set_thread_limit(task_t task, uint16_t thread_limit)
7123 {
7124 	assert(task != kernel_task);
7125 	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7126 		task_lock(task);
7127 		task->task_thread_limit = thread_limit;
7128 		task_unlock(task);
7129 	}
7130 }
7131 
7132 #if CONFIG_PROC_RESOURCE_LIMITS
7133 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7134 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7135 {
7136 	return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7137 }
7138 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7139 
7140 #if XNU_TARGET_OS_OSX
7141 boolean_t
task_has_system_version_compat_enabled(task_t task)7142 task_has_system_version_compat_enabled(task_t task)
7143 {
7144 	boolean_t enabled = FALSE;
7145 
7146 	task_lock(task);
7147 	enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7148 	task_unlock(task);
7149 
7150 	return enabled;
7151 }
7152 
7153 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7154 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7155 {
7156 	assert(task == current_task());
7157 	assert(task != kernel_task);
7158 
7159 	task_lock(task);
7160 	if (enable_system_version_compat) {
7161 		task->t_flags |= TF_SYS_VERSION_COMPAT;
7162 	} else {
7163 		task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7164 	}
7165 	task_unlock(task);
7166 }
7167 #endif /* XNU_TARGET_OS_OSX */
7168 
7169 /*
7170  * We need to export some functions to other components that
7171  * are currently implemented in macros within the osfmk
7172  * component.  Just export them as functions of the same name.
7173  */
7174 boolean_t
is_kerneltask(task_t t)7175 is_kerneltask(task_t t)
7176 {
7177 	if (t == kernel_task) {
7178 		return TRUE;
7179 	}
7180 
7181 	return FALSE;
7182 }
7183 
7184 boolean_t
is_corpsefork(task_t t)7185 is_corpsefork(task_t t)
7186 {
7187 	return task_is_a_corpse_fork(t);
7188 }
7189 
7190 task_t
current_task_early(void)7191 current_task_early(void)
7192 {
7193 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7194 		if (current_thread()->t_tro == NULL) {
7195 			return TASK_NULL;
7196 		}
7197 	}
7198 	return get_threadtask(current_thread());
7199 }
7200 
7201 task_t
current_task(void)7202 current_task(void)
7203 {
7204 	return get_threadtask(current_thread());
7205 }
7206 
7207 /* defined in bsd/kern/kern_prot.c */
7208 extern int get_audit_token_pid(audit_token_t *audit_token);
7209 
7210 int
task_pid(task_t task)7211 task_pid(task_t task)
7212 {
7213 	if (task) {
7214 		return get_audit_token_pid(task_get_audit_token(task));
7215 	}
7216 	return -1;
7217 }
7218 
7219 #if __has_feature(ptrauth_calls)
7220 /*
7221  * Get the shared region id and jop signing key for the task.
7222  * The function will allocate a kalloc buffer and return
7223  * it to caller, the caller needs to free it. This is used
7224  * for getting the information via task port.
7225  */
7226 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7227 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7228 {
7229 	size_t len;
7230 	char *shared_region_id = NULL;
7231 
7232 	task_lock(task);
7233 	if (task->shared_region_id == NULL) {
7234 		task_unlock(task);
7235 		return NULL;
7236 	}
7237 	len = strlen(task->shared_region_id) + 1;
7238 
7239 	/* don't hold task lock while allocating */
7240 	task_unlock(task);
7241 	shared_region_id = kalloc_data(len, Z_WAITOK);
7242 	task_lock(task);
7243 
7244 	if (task->shared_region_id == NULL) {
7245 		task_unlock(task);
7246 		kfree_data(shared_region_id, len);
7247 		return NULL;
7248 	}
7249 	assert(len == strlen(task->shared_region_id) + 1);         /* should never change */
7250 	strlcpy(shared_region_id, task->shared_region_id, len);
7251 	task_unlock(task);
7252 
7253 	/* find key from its auth pager */
7254 	if (jop_pid != NULL) {
7255 		*jop_pid = shared_region_find_key(shared_region_id);
7256 	}
7257 
7258 	return shared_region_id;
7259 }
7260 
7261 /*
7262  * set the shared region id for a task
7263  */
7264 void
task_set_shared_region_id(task_t task,char * id)7265 task_set_shared_region_id(task_t task, char *id)
7266 {
7267 	char *old_id;
7268 
7269 	task_lock(task);
7270 	old_id = task->shared_region_id;
7271 	task->shared_region_id = id;
7272 	task->shared_region_auth_remapped = FALSE;
7273 	task_unlock(task);
7274 
7275 	/* free any pre-existing shared region id */
7276 	if (old_id != NULL) {
7277 		shared_region_key_dealloc(old_id);
7278 		kfree_data(old_id, strlen(old_id) + 1);
7279 	}
7280 }
7281 #endif /* __has_feature(ptrauth_calls) */
7282 
7283 /*
7284  * This routine finds a thread in a task by its unique id
7285  * Returns a referenced thread or THREAD_NULL if the thread was not found
7286  *
7287  * TODO: This is super inefficient - it's an O(threads in task) list walk!
7288  *       We should make a tid hash, or transition all tid clients to thread ports
7289  *
7290  * Precondition: No locks held (will take task lock)
7291  */
7292 thread_t
task_findtid(task_t task,uint64_t tid)7293 task_findtid(task_t task, uint64_t tid)
7294 {
7295 	thread_t self           = current_thread();
7296 	thread_t found_thread   = THREAD_NULL;
7297 	thread_t iter_thread    = THREAD_NULL;
7298 
7299 	/* Short-circuit the lookup if we're looking up ourselves */
7300 	if (tid == self->thread_id || tid == TID_NULL) {
7301 		assert(get_threadtask(self) == task);
7302 
7303 		thread_reference(self);
7304 
7305 		return self;
7306 	}
7307 
7308 	task_lock(task);
7309 
7310 	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7311 		if (iter_thread->thread_id == tid) {
7312 			found_thread = iter_thread;
7313 			thread_reference(found_thread);
7314 			break;
7315 		}
7316 	}
7317 
7318 	task_unlock(task);
7319 
7320 	return found_thread;
7321 }
7322 
7323 int
pid_from_task(task_t task)7324 pid_from_task(task_t task)
7325 {
7326 	int pid = -1;
7327 	void *bsd_info = get_bsdtask_info(task);
7328 
7329 	if (bsd_info) {
7330 		pid = proc_pid(bsd_info);
7331 	} else {
7332 		pid = task_pid(task);
7333 	}
7334 
7335 	return pid;
7336 }
7337 
7338 /*
7339  * Control the CPU usage monitor for a task.
7340  */
7341 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7342 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7343 {
7344 	int error = KERN_SUCCESS;
7345 
7346 	if (*flags & CPUMON_MAKE_FATAL) {
7347 		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7348 	} else {
7349 		error = KERN_INVALID_ARGUMENT;
7350 	}
7351 
7352 	return error;
7353 }
7354 
7355 /*
7356  * Control the wakeups monitor for a task.
7357  */
7358 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7359 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7360 {
7361 	ledger_t ledger = task->ledger;
7362 
7363 	task_lock(task);
7364 	if (*flags & WAKEMON_GET_PARAMS) {
7365 		ledger_amount_t limit;
7366 		uint64_t                period;
7367 
7368 		ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7369 		ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7370 
7371 		if (limit != LEDGER_LIMIT_INFINITY) {
7372 			/*
7373 			 * An active limit means the wakeups monitor is enabled.
7374 			 */
7375 			*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7376 			*flags = WAKEMON_ENABLE;
7377 			if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7378 				*flags |= WAKEMON_MAKE_FATAL;
7379 			}
7380 		} else {
7381 			*flags = WAKEMON_DISABLE;
7382 			*rate_hz = -1;
7383 		}
7384 
7385 		/*
7386 		 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7387 		 */
7388 		task_unlock(task);
7389 		return KERN_SUCCESS;
7390 	}
7391 
7392 	if (*flags & WAKEMON_ENABLE) {
7393 		if (*flags & WAKEMON_SET_DEFAULTS) {
7394 			*rate_hz = task_wakeups_monitor_rate;
7395 		}
7396 
7397 #ifndef CONFIG_NOMONITORS
7398 		if (*flags & WAKEMON_MAKE_FATAL) {
7399 			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7400 		}
7401 #endif /* CONFIG_NOMONITORS */
7402 
7403 		if (*rate_hz <= 0) {
7404 			task_unlock(task);
7405 			return KERN_INVALID_ARGUMENT;
7406 		}
7407 
7408 #ifndef CONFIG_NOMONITORS
7409 		ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7410 		    (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7411 		ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7412 		ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7413 #endif /* CONFIG_NOMONITORS */
7414 	} else if (*flags & WAKEMON_DISABLE) {
7415 		/*
7416 		 * Caller wishes to disable wakeups monitor on the task.
7417 		 *
7418 		 * Disable telemetry if it was triggered by the wakeups monitor, and
7419 		 * remove the limit & callback on the wakeups ledger entry.
7420 		 */
7421 #if CONFIG_TELEMETRY
7422 		telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
7423 #endif
7424 		ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
7425 		ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
7426 	}
7427 
7428 	task_unlock(task);
7429 	return KERN_SUCCESS;
7430 }
7431 
7432 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)7433 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7434 {
7435 	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7436 #if CONFIG_TELEMETRY
7437 		/*
7438 		 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
7439 		 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
7440 		 */
7441 		telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
7442 #endif
7443 		return;
7444 	}
7445 
7446 #if CONFIG_TELEMETRY
7447 	/*
7448 	 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
7449 	 * exceeded the limit, turn telemetry off for the task.
7450 	 */
7451 	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
7452 #endif
7453 
7454 	if (warning == 0) {
7455 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
7456 	}
7457 }
7458 
7459 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)7460 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
7461 {
7462 	task_t                      task        = current_task();
7463 	int                         pid         = 0;
7464 	const char                  *procname   = "unknown";
7465 	boolean_t                   fatal;
7466 	kern_return_t               kr;
7467 #ifdef EXC_RESOURCE_MONITORS
7468 	mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
7469 #endif /* EXC_RESOURCE_MONITORS */
7470 	struct ledger_entry_info    lei;
7471 
7472 #ifdef MACH_BSD
7473 	pid = proc_selfpid();
7474 	if (get_bsdtask_info(task) != NULL) {
7475 		procname = proc_name_address(get_bsdtask_info(current_task()));
7476 	}
7477 #endif
7478 
7479 	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
7480 
7481 	/*
7482 	 * Disable the exception notification so we don't overwhelm
7483 	 * the listener with an endless stream of redundant exceptions.
7484 	 * TODO: detect whether another thread is already reporting the violation.
7485 	 */
7486 	uint32_t flags = WAKEMON_DISABLE;
7487 	task_wakeups_monitor_ctl(task, &flags, NULL);
7488 
7489 	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7490 	trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
7491 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
7492 	    "over ~%llu seconds, averaging %llu wakes / second and "
7493 	    "violating a %slimit of %llu wakes over %llu seconds.\n",
7494 	    procname, pid,
7495 	    lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
7496 	    lei.lei_last_refill == 0 ? 0 :
7497 	    (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
7498 	    fatal ? "FATAL " : "",
7499 	    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
7500 
7501 	kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
7502 	    fatal ? kRNFatalLimitFlag : 0);
7503 	if (kr) {
7504 		printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
7505 	}
7506 
7507 #ifdef EXC_RESOURCE_MONITORS
7508 	if (disable_exc_resource) {
7509 		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7510 		    "supressed by a boot-arg\n", procname, pid);
7511 		return;
7512 	}
7513 	if (audio_active) {
7514 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7515 		    "supressed due to audio playback\n", procname, pid);
7516 		return;
7517 	}
7518 	if (lei.lei_last_refill == 0) {
7519 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7520 		    "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
7521 	}
7522 
7523 	code[0] = code[1] = 0;
7524 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
7525 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
7526 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
7527 	    NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
7528 	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
7529 	    lei.lei_last_refill);
7530 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
7531 	    NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
7532 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7533 #endif /* EXC_RESOURCE_MONITORS */
7534 
7535 	if (fatal) {
7536 		task_terminate_internal(task);
7537 	}
7538 }
7539 
7540 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)7541 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
7542 {
7543 	int64_t old_count, new_count;
7544 	boolean_t needs_telemetry;
7545 
7546 	do {
7547 		new_count = old_count = *global_write_count;
7548 		new_count += io_delta;
7549 		if (new_count >= io_telemetry_limit) {
7550 			new_count = 0;
7551 			needs_telemetry = TRUE;
7552 		} else {
7553 			needs_telemetry = FALSE;
7554 		}
7555 	} while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
7556 	return needs_telemetry;
7557 }
7558 
7559 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)7560 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
7561 {
7562 #if CONFIG_PHYS_WRITE_ACCT
7563 	if (!io_size) {
7564 		return;
7565 	}
7566 
7567 	/*
7568 	 * task == NULL means that we have to update kernel_task ledgers
7569 	 */
7570 	if (!task) {
7571 		task = kernel_task;
7572 	}
7573 
7574 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
7575 	    task_pid(task), flavor, io_size, flags, 0);
7576 	DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
7577 
7578 	if (flags & TASK_BALANCE_CREDIT) {
7579 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7580 			OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7581 			ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7582 		}
7583 	} else if (flags & TASK_BALANCE_DEBIT) {
7584 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7585 			OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7586 			ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7587 		}
7588 	}
7589 #endif /* CONFIG_PHYS_WRITE_ACCT */
7590 }
7591 
7592 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)7593 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
7594 {
7595 	int64_t io_delta = 0;
7596 	int64_t * global_counter_to_update;
7597 	boolean_t needs_telemetry = FALSE;
7598 	boolean_t is_external_device = FALSE;
7599 	int ledger_to_update = 0;
7600 	struct task_writes_counters * writes_counters_to_update;
7601 
7602 	if ((!task) || (!io_size) || (!vp)) {
7603 		return;
7604 	}
7605 
7606 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
7607 	    task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
7608 	DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
7609 
7610 	// Is the drive backing this vnode internal or external to the system?
7611 	if (vnode_isonexternalstorage(vp) == false) {
7612 		global_counter_to_update = &global_logical_writes_count;
7613 		ledger_to_update = task_ledgers.logical_writes;
7614 		writes_counters_to_update = &task->task_writes_counters_internal;
7615 		is_external_device = FALSE;
7616 	} else {
7617 		global_counter_to_update = &global_logical_writes_to_external_count;
7618 		ledger_to_update = task_ledgers.logical_writes_to_external;
7619 		writes_counters_to_update = &task->task_writes_counters_external;
7620 		is_external_device = TRUE;
7621 	}
7622 
7623 	switch (flags) {
7624 	case TASK_WRITE_IMMEDIATE:
7625 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
7626 		ledger_credit(task->ledger, ledger_to_update, io_size);
7627 		if (!is_external_device) {
7628 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7629 		}
7630 		break;
7631 	case TASK_WRITE_DEFERRED:
7632 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
7633 		ledger_credit(task->ledger, ledger_to_update, io_size);
7634 		if (!is_external_device) {
7635 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7636 		}
7637 		break;
7638 	case TASK_WRITE_INVALIDATED:
7639 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
7640 		ledger_debit(task->ledger, ledger_to_update, io_size);
7641 		if (!is_external_device) {
7642 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
7643 		}
7644 		break;
7645 	case TASK_WRITE_METADATA:
7646 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
7647 		ledger_credit(task->ledger, ledger_to_update, io_size);
7648 		if (!is_external_device) {
7649 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7650 		}
7651 		break;
7652 	}
7653 
7654 	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
7655 	if (io_telemetry_limit != 0) {
7656 		/* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
7657 		needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
7658 		if (needs_telemetry && !is_external_device) {
7659 			act_set_io_telemetry_ast(current_thread());
7660 		}
7661 	}
7662 }
7663 
7664 /*
7665  * Control the I/O monitor for a task.
7666  */
7667 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)7668 task_io_monitor_ctl(task_t task, uint32_t *flags)
7669 {
7670 	ledger_t ledger = task->ledger;
7671 
7672 	task_lock(task);
7673 	if (*flags & IOMON_ENABLE) {
7674 		/* Configure the physical I/O ledger */
7675 		ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
7676 		ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
7677 	} else if (*flags & IOMON_DISABLE) {
7678 		/*
7679 		 * Caller wishes to disable I/O monitor on the task.
7680 		 */
7681 		ledger_disable_refill(ledger, task_ledgers.physical_writes);
7682 		ledger_disable_callback(ledger, task_ledgers.physical_writes);
7683 	}
7684 
7685 	task_unlock(task);
7686 	return KERN_SUCCESS;
7687 }
7688 
7689 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)7690 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
7691 {
7692 	if (warning == 0) {
7693 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
7694 	}
7695 }
7696 
7697 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)7698 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
7699 {
7700 	int                             pid = 0;
7701 	task_t                          task = current_task();
7702 #ifdef EXC_RESOURCE_MONITORS
7703 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7704 #endif /* EXC_RESOURCE_MONITORS */
7705 	struct ledger_entry_info        lei = {};
7706 	kern_return_t                   kr;
7707 
7708 #ifdef MACH_BSD
7709 	pid = proc_selfpid();
7710 #endif
7711 	/*
7712 	 * Get the ledger entry info. We need to do this before disabling the exception
7713 	 * to get correct values for all fields.
7714 	 */
7715 	switch (flavor) {
7716 	case FLAVOR_IO_PHYSICAL_WRITES:
7717 		ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
7718 		break;
7719 	}
7720 
7721 
7722 	/*
7723 	 * Disable the exception notification so we don't overwhelm
7724 	 * the listener with an endless stream of redundant exceptions.
7725 	 * TODO: detect whether another thread is already reporting the violation.
7726 	 */
7727 	uint32_t flags = IOMON_DISABLE;
7728 	task_io_monitor_ctl(task, &flags);
7729 
7730 	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
7731 		trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
7732 	}
7733 	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
7734 	    pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
7735 
7736 	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
7737 	if (kr) {
7738 		printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
7739 	}
7740 
7741 #ifdef EXC_RESOURCE_MONITORS
7742 	code[0] = code[1] = 0;
7743 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
7744 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
7745 	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
7746 	EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
7747 	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
7748 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7749 #endif /* EXC_RESOURCE_MONITORS */
7750 }
7751 
7752 void
task_port_space_ast(__unused task_t task)7753 task_port_space_ast(__unused task_t task)
7754 {
7755 	uint32_t current_size, soft_limit, hard_limit;
7756 	assert(task == current_task());
7757 	kern_return_t ret = ipc_space_get_table_size_and_limits(task->itk_space,
7758 	    &current_size, &soft_limit, &hard_limit);
7759 	if (ret == KERN_SUCCESS) {
7760 		SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
7761 	}
7762 }
7763 
7764 #if CONFIG_PROC_RESOURCE_LIMITS
7765 static mach_port_t
task_allocate_fatal_port(void)7766 task_allocate_fatal_port(void)
7767 {
7768 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7769 	task_id_token_t token;
7770 
7771 	kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
7772 	if (kr) {
7773 		return MACH_PORT_NULL;
7774 	}
7775 	task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
7776 	    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
7777 
7778 	task_id_token_set_port(token, task_fatal_port);
7779 
7780 	return task_fatal_port;
7781 }
7782 
7783 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)7784 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
7785 {
7786 	task_t task = TASK_NULL;
7787 	kern_return_t kr;
7788 
7789 	task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
7790 
7791 	assert(token != NULL);
7792 	if (token) {
7793 		kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
7794 		if (task) {
7795 			task_bsdtask_kill(task);
7796 			task_deallocate(task);
7797 		}
7798 		task_id_token_release(token); /* consumes ref given by notification */
7799 	}
7800 }
7801 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7802 
7803 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)7804 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
7805 {
7806 	int pid = 0;
7807 	char *procname = (char *) "unknown";
7808 	__unused kern_return_t kr;
7809 	__unused resource_notify_flags_t flags = kRNFlagsNone;
7810 	__unused uint32_t limit;
7811 	__unused mach_port_t task_fatal_port = MACH_PORT_NULL;
7812 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7813 
7814 #ifdef MACH_BSD
7815 	pid = proc_selfpid();
7816 	if (get_bsdtask_info(task) != NULL) {
7817 		procname = proc_name_address(get_bsdtask_info(task));
7818 	}
7819 #endif
7820 	/*
7821 	 * Only kernel_task and launchd may be allowed to
7822 	 * have really large ipc space.
7823 	 */
7824 	if (pid == 0 || pid == 1) {
7825 		return;
7826 	}
7827 
7828 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
7829 	    Num of ports allocated %u; \n", procname, pid, current_size);
7830 
7831 	/* Abort the process if it has hit the system-wide limit for ipc port table size */
7832 	if (!hard_limit && !soft_limit) {
7833 		code[0] = code[1] = 0;
7834 		EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
7835 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
7836 		EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
7837 
7838 		exit_with_port_space_exception(current_proc(), code[0], code[1]);
7839 
7840 		return;
7841 	}
7842 
7843 #if CONFIG_PROC_RESOURCE_LIMITS
7844 	if (hard_limit > 0) {
7845 		flags |= kRNHardLimitFlag;
7846 		limit = hard_limit;
7847 		task_fatal_port = task_allocate_fatal_port();
7848 		if (!task_fatal_port) {
7849 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
7850 			task_bsdtask_kill(task);
7851 		}
7852 	} else {
7853 		flags |= kRNSoftLimitFlag;
7854 		limit = soft_limit;
7855 	}
7856 
7857 	kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
7858 	if (kr) {
7859 		os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
7860 	}
7861 	if (task_fatal_port) {
7862 		ipc_port_release_send(task_fatal_port);
7863 	}
7864 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7865 }
7866 
7867 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)7868 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
7869 {
7870 #if CONFIG_PROC_RESOURCE_LIMITS
7871 	assert(task == current_task());
7872 	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
7873 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7874 }
7875 
7876 #if CONFIG_PROC_RESOURCE_LIMITS
7877 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)7878 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
7879 {
7880 	int pid = 0;
7881 	char *procname = (char *) "unknown";
7882 	kern_return_t kr;
7883 	resource_notify_flags_t flags = kRNFlagsNone;
7884 	int limit;
7885 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7886 
7887 #ifdef MACH_BSD
7888 	pid = proc_selfpid();
7889 	if (get_bsdtask_info(task) != NULL) {
7890 		procname = proc_name_address(get_bsdtask_info(task));
7891 	}
7892 #endif
7893 	/*
7894 	 * Only kernel_task and launchd may be allowed to
7895 	 * have really large ipc space.
7896 	 */
7897 	if (pid == 0 || pid == 1) {
7898 		return;
7899 	}
7900 
7901 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
7902 	    Num of fds allocated %u; \n", procname, pid, current_size);
7903 
7904 	if (hard_limit > 0) {
7905 		flags |= kRNHardLimitFlag;
7906 		limit = hard_limit;
7907 		task_fatal_port = task_allocate_fatal_port();
7908 		if (!task_fatal_port) {
7909 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
7910 			task_bsdtask_kill(task);
7911 		}
7912 	} else {
7913 		flags |= kRNSoftLimitFlag;
7914 		limit = soft_limit;
7915 	}
7916 
7917 	kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
7918 	if (kr) {
7919 		os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
7920 	}
7921 	if (task_fatal_port) {
7922 		ipc_port_release_send(task_fatal_port);
7923 	}
7924 }
7925 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7926 
7927 /* Placeholders for the task set/get voucher interfaces */
7928 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)7929 task_get_mach_voucher(
7930 	task_t                  task,
7931 	mach_voucher_selector_t __unused which,
7932 	ipc_voucher_t           *voucher)
7933 {
7934 	if (TASK_NULL == task) {
7935 		return KERN_INVALID_TASK;
7936 	}
7937 
7938 	*voucher = NULL;
7939 	return KERN_SUCCESS;
7940 }
7941 
7942 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)7943 task_set_mach_voucher(
7944 	task_t                  task,
7945 	ipc_voucher_t           __unused voucher)
7946 {
7947 	if (TASK_NULL == task) {
7948 		return KERN_INVALID_TASK;
7949 	}
7950 
7951 	return KERN_SUCCESS;
7952 }
7953 
7954 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)7955 task_swap_mach_voucher(
7956 	__unused task_t         task,
7957 	__unused ipc_voucher_t  new_voucher,
7958 	ipc_voucher_t          *in_out_old_voucher)
7959 {
7960 	/*
7961 	 * Currently this function is only called from a MIG generated
7962 	 * routine which doesn't release the reference on the voucher
7963 	 * addressed by in_out_old_voucher. To avoid leaking this reference,
7964 	 * a call to release it has been added here.
7965 	 */
7966 	ipc_voucher_release(*in_out_old_voucher);
7967 	OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
7968 }
7969 
7970 void
task_set_gpu_denied(task_t task,boolean_t denied)7971 task_set_gpu_denied(task_t task, boolean_t denied)
7972 {
7973 	task_lock(task);
7974 
7975 	if (denied) {
7976 		task->t_flags |= TF_GPU_DENIED;
7977 	} else {
7978 		task->t_flags &= ~TF_GPU_DENIED;
7979 	}
7980 
7981 	task_unlock(task);
7982 }
7983 
7984 boolean_t
task_is_gpu_denied(task_t task)7985 task_is_gpu_denied(task_t task)
7986 {
7987 	/* We don't need the lock to read this flag */
7988 	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
7989 }
7990 
7991 
7992 uint64_t
get_task_memory_region_count(task_t task)7993 get_task_memory_region_count(task_t task)
7994 {
7995 	vm_map_t map;
7996 	map = (task == kernel_task) ? kernel_map: task->map;
7997 	return (uint64_t)get_map_nentries(map);
7998 }
7999 
8000 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)8001 kdebug_trace_dyld_internal(uint32_t base_code,
8002     struct dyld_kernel_image_info *info)
8003 {
8004 	static_assert(sizeof(info->uuid) >= 16);
8005 
8006 #if defined(__LP64__)
8007 	uint64_t *uuid = (uint64_t *)&(info->uuid);
8008 
8009 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8010 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8011 	    uuid[1], info->load_addr,
8012 	    (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8013 	    0);
8014 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8015 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8016 	    (uint64_t)info->fsobjid.fid_objno |
8017 	    ((uint64_t)info->fsobjid.fid_generation << 32),
8018 	    0, 0, 0, 0);
8019 #else /* defined(__LP64__) */
8020 	uint32_t *uuid = (uint32_t *)&(info->uuid);
8021 
8022 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8023 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8024 	    uuid[1], uuid[2], uuid[3], 0);
8025 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8026 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8027 	    (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8028 	    info->fsobjid.fid_objno, 0);
8029 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8030 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8031 	    info->fsobjid.fid_generation, 0, 0, 0, 0);
8032 #endif /* !defined(__LP64__) */
8033 }
8034 
8035 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8036 kdebug_trace_dyld(task_t task, uint32_t base_code,
8037     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8038 {
8039 	kern_return_t kr;
8040 	dyld_kernel_image_info_array_t infos;
8041 	vm_map_offset_t map_data;
8042 	vm_offset_t data;
8043 
8044 	if (!infos_copy) {
8045 		return KERN_INVALID_ADDRESS;
8046 	}
8047 
8048 	if (!kdebug_enable ||
8049 	    !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8050 		vm_map_copy_discard(infos_copy);
8051 		return KERN_SUCCESS;
8052 	}
8053 
8054 	if (task == NULL || task != current_task()) {
8055 		return KERN_INVALID_TASK;
8056 	}
8057 
8058 	kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8059 	if (kr != KERN_SUCCESS) {
8060 		return kr;
8061 	}
8062 
8063 	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8064 
8065 	for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8066 		kdebug_trace_dyld_internal(base_code, &(infos[i]));
8067 	}
8068 
8069 	data = CAST_DOWN(vm_offset_t, map_data);
8070 	mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8071 	return KERN_SUCCESS;
8072 }
8073 
8074 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8075 task_register_dyld_image_infos(task_t task,
8076     dyld_kernel_image_info_array_t infos_copy,
8077     mach_msg_type_number_t infos_len)
8078 {
8079 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8080 	           (vm_map_copy_t)infos_copy, infos_len);
8081 }
8082 
8083 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8084 task_unregister_dyld_image_infos(task_t task,
8085     dyld_kernel_image_info_array_t infos_copy,
8086     mach_msg_type_number_t infos_len)
8087 {
8088 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8089 	           (vm_map_copy_t)infos_copy, infos_len);
8090 }
8091 
8092 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8093 task_get_dyld_image_infos(__unused task_t task,
8094     __unused dyld_kernel_image_info_array_t * dyld_images,
8095     __unused mach_msg_type_number_t * dyld_imagesCnt)
8096 {
8097 	return KERN_NOT_SUPPORTED;
8098 }
8099 
8100 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8101 task_register_dyld_shared_cache_image_info(task_t task,
8102     dyld_kernel_image_info_t cache_img,
8103     __unused boolean_t no_cache,
8104     __unused boolean_t private_cache)
8105 {
8106 	if (task == NULL || task != current_task()) {
8107 		return KERN_INVALID_TASK;
8108 	}
8109 
8110 	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8111 	return KERN_SUCCESS;
8112 }
8113 
8114 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8115 task_register_dyld_set_dyld_state(__unused task_t task,
8116     __unused uint8_t dyld_state)
8117 {
8118 	return KERN_NOT_SUPPORTED;
8119 }
8120 
8121 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8122 task_register_dyld_get_process_state(__unused task_t task,
8123     __unused dyld_kernel_process_info_t * dyld_process_state)
8124 {
8125 	return KERN_NOT_SUPPORTED;
8126 }
8127 
8128 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8129 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8130     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8131 {
8132 #if CONFIG_PERVASIVE_CPI
8133 	task_t task = (task_t)task_insp;
8134 	kern_return_t kr = KERN_SUCCESS;
8135 	mach_msg_type_number_t size;
8136 
8137 	if (task == TASK_NULL) {
8138 		return KERN_INVALID_ARGUMENT;
8139 	}
8140 
8141 	size = *size_in_out;
8142 
8143 	switch (flavor) {
8144 	case TASK_INSPECT_BASIC_COUNTS: {
8145 		struct task_inspect_basic_counts *bc =
8146 		    (struct task_inspect_basic_counts *)info_out;
8147 		struct recount_usage stats = { 0 };
8148 		if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8149 			kr = KERN_INVALID_ARGUMENT;
8150 			break;
8151 		}
8152 
8153 		recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8154 		bc->instructions = stats.ru_instructions;
8155 		bc->cycles = stats.ru_cycles;
8156 		size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8157 		break;
8158 	}
8159 	default:
8160 		kr = KERN_INVALID_ARGUMENT;
8161 		break;
8162 	}
8163 
8164 	if (kr == KERN_SUCCESS) {
8165 		*size_in_out = size;
8166 	}
8167 	return kr;
8168 #else /* CONFIG_PERVASIVE_CPI */
8169 #pragma unused(task_insp, flavor, info_out, size_in_out)
8170 	return KERN_NOT_SUPPORTED;
8171 #endif /* !CONFIG_PERVASIVE_CPI */
8172 }
8173 
8174 #if CONFIG_SECLUDED_MEMORY
8175 int num_tasks_can_use_secluded_mem = 0;
8176 
8177 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8178 task_set_can_use_secluded_mem(
8179 	task_t          task,
8180 	boolean_t       can_use_secluded_mem)
8181 {
8182 	if (!task->task_could_use_secluded_mem) {
8183 		return;
8184 	}
8185 	task_lock(task);
8186 	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8187 	task_unlock(task);
8188 }
8189 
8190 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8191 task_set_can_use_secluded_mem_locked(
8192 	task_t          task,
8193 	boolean_t       can_use_secluded_mem)
8194 {
8195 	assert(task->task_could_use_secluded_mem);
8196 	if (can_use_secluded_mem &&
8197 	    secluded_for_apps &&         /* global boot-arg */
8198 	    !task->task_can_use_secluded_mem) {
8199 		assert(num_tasks_can_use_secluded_mem >= 0);
8200 		OSAddAtomic(+1,
8201 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8202 		task->task_can_use_secluded_mem = TRUE;
8203 	} else if (!can_use_secluded_mem &&
8204 	    task->task_can_use_secluded_mem) {
8205 		assert(num_tasks_can_use_secluded_mem > 0);
8206 		OSAddAtomic(-1,
8207 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8208 		task->task_can_use_secluded_mem = FALSE;
8209 	}
8210 }
8211 
8212 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8213 task_set_could_use_secluded_mem(
8214 	task_t          task,
8215 	boolean_t       could_use_secluded_mem)
8216 {
8217 	task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8218 }
8219 
8220 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8221 task_set_could_also_use_secluded_mem(
8222 	task_t          task,
8223 	boolean_t       could_also_use_secluded_mem)
8224 {
8225 	task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8226 }
8227 
8228 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8229 task_can_use_secluded_mem(
8230 	task_t          task,
8231 	boolean_t       is_alloc)
8232 {
8233 	if (task->task_can_use_secluded_mem) {
8234 		assert(task->task_could_use_secluded_mem);
8235 		assert(num_tasks_can_use_secluded_mem > 0);
8236 		return TRUE;
8237 	}
8238 	if (task->task_could_also_use_secluded_mem &&
8239 	    num_tasks_can_use_secluded_mem > 0) {
8240 		assert(num_tasks_can_use_secluded_mem > 0);
8241 		return TRUE;
8242 	}
8243 
8244 	/*
8245 	 * If a single task is using more than some large amount of
8246 	 * memory (i.e. secluded_shutoff_trigger) and is approaching
8247 	 * its task limit, allow it to dip into secluded and begin
8248 	 * suppression of rebuilding secluded memory until that task exits.
8249 	 */
8250 	if (is_alloc && secluded_shutoff_trigger != 0) {
8251 		uint64_t phys_used = get_task_phys_footprint(task);
8252 		uint64_t limit = get_task_phys_footprint_limit(task);
8253 		if (phys_used > secluded_shutoff_trigger &&
8254 		    limit > secluded_shutoff_trigger &&
8255 		    phys_used > limit - secluded_shutoff_headroom) {
8256 			start_secluded_suppression(task);
8257 			return TRUE;
8258 		}
8259 	}
8260 
8261 	return FALSE;
8262 }
8263 
8264 boolean_t
task_could_use_secluded_mem(task_t task)8265 task_could_use_secluded_mem(
8266 	task_t  task)
8267 {
8268 	return task->task_could_use_secluded_mem;
8269 }
8270 
8271 boolean_t
task_could_also_use_secluded_mem(task_t task)8272 task_could_also_use_secluded_mem(
8273 	task_t  task)
8274 {
8275 	return task->task_could_also_use_secluded_mem;
8276 }
8277 #endif /* CONFIG_SECLUDED_MEMORY */
8278 
8279 queue_head_t *
task_io_user_clients(task_t task)8280 task_io_user_clients(task_t task)
8281 {
8282 	return &task->io_user_clients;
8283 }
8284 
8285 void
task_set_message_app_suspended(task_t task,boolean_t enable)8286 task_set_message_app_suspended(task_t task, boolean_t enable)
8287 {
8288 	task->message_app_suspended = enable;
8289 }
8290 
8291 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8292 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8293 {
8294 	dst_task->vtimers = src_task->vtimers;
8295 }
8296 
8297 #if DEVELOPMENT || DEBUG
8298 int vm_region_footprint = 0;
8299 #endif /* DEVELOPMENT || DEBUG */
8300 
8301 boolean_t
task_self_region_footprint(void)8302 task_self_region_footprint(void)
8303 {
8304 #if DEVELOPMENT || DEBUG
8305 	if (vm_region_footprint) {
8306 		/* system-wide override */
8307 		return TRUE;
8308 	}
8309 #endif /* DEVELOPMENT || DEBUG */
8310 	return current_task()->task_region_footprint;
8311 }
8312 
8313 void
task_self_region_footprint_set(boolean_t newval)8314 task_self_region_footprint_set(
8315 	boolean_t newval)
8316 {
8317 	task_t  curtask;
8318 
8319 	curtask = current_task();
8320 	task_lock(curtask);
8321 	if (newval) {
8322 		curtask->task_region_footprint = TRUE;
8323 	} else {
8324 		curtask->task_region_footprint = FALSE;
8325 	}
8326 	task_unlock(curtask);
8327 }
8328 
8329 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)8330 task_set_darkwake_mode(task_t task, boolean_t set_mode)
8331 {
8332 	assert(task);
8333 
8334 	task_lock(task);
8335 
8336 	if (set_mode) {
8337 		task->t_flags |= TF_DARKWAKE_MODE;
8338 	} else {
8339 		task->t_flags &= ~(TF_DARKWAKE_MODE);
8340 	}
8341 
8342 	task_unlock(task);
8343 }
8344 
8345 boolean_t
task_get_darkwake_mode(task_t task)8346 task_get_darkwake_mode(task_t task)
8347 {
8348 	assert(task);
8349 	return (task->t_flags & TF_DARKWAKE_MODE) != 0;
8350 }
8351 
8352 /*
8353  * Set default behavior for task's control port and EXC_GUARD variants that have
8354  * settable behavior.
8355  *
8356  * Platform binaries typically have one behavior, third parties another -
8357  * but there are special exception we may need to account for.
8358  */
8359 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)8360 task_set_exc_guard_ctrl_port_default(
8361 	task_t task,
8362 	thread_t main_thread,
8363 	const char *name,
8364 	unsigned int namelen,
8365 	boolean_t is_simulated,
8366 	uint32_t platform,
8367 	uint32_t sdk)
8368 {
8369 	task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8370 
8371 	if (task_get_platform_binary(task)) {
8372 		/* set exc guard default behavior for first-party code */
8373 		task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
8374 
8375 		if (1 == task_pid(task)) {
8376 			/* special flags for inittask - delivery every instance as corpse */
8377 			task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
8378 		} else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
8379 			/* honor by-name default setting overrides */
8380 
8381 			int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
8382 
8383 			for (int i = 0; i < count; i++) {
8384 				const struct task_exc_guard_named_default *named_default =
8385 				    &task_exc_guard_named_defaults[i];
8386 				if (strncmp(named_default->name, name, namelen) == 0 &&
8387 				    strlen(named_default->name) == namelen) {
8388 					task->task_exc_guard = named_default->behavior;
8389 					break;
8390 				}
8391 			}
8392 		}
8393 
8394 		/* set control port options for 1p code, inherited from parent task by default */
8395 		opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
8396 	} else {
8397 		/* set exc guard default behavior for third-party code */
8398 		task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
8399 		/* set control port options for 3p code, inherited from parent task by default */
8400 		opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
8401 	}
8402 
8403 	if (is_simulated) {
8404 		/* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
8405 		if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
8406 		    (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
8407 		    (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
8408 			task->task_exc_guard = TASK_EXC_GUARD_NONE;
8409 		}
8410 		/* Disable protection for control ports for simulated binaries */
8411 		opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8412 	}
8413 
8414 
8415 	task_set_control_port_options(task, opts);
8416 
8417 	task_set_immovable_pinned(task);
8418 	main_thread_set_immovable_pinned(main_thread);
8419 }
8420 
8421 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)8422 task_get_exc_guard_behavior(
8423 	task_t task,
8424 	task_exc_guard_behavior_t *behaviorp)
8425 {
8426 	if (task == TASK_NULL) {
8427 		return KERN_INVALID_TASK;
8428 	}
8429 	*behaviorp = task->task_exc_guard;
8430 	return KERN_SUCCESS;
8431 }
8432 
8433 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)8434 task_set_exc_guard_behavior(
8435 	task_t task,
8436 	task_exc_guard_behavior_t new_behavior)
8437 {
8438 	if (task == TASK_NULL) {
8439 		return KERN_INVALID_TASK;
8440 	}
8441 	if (new_behavior & ~TASK_EXC_GUARD_ALL) {
8442 		return KERN_INVALID_VALUE;
8443 	}
8444 
8445 	/* limit setting to that allowed for this config */
8446 	new_behavior = new_behavior & task_exc_guard_config_mask;
8447 
8448 #if !defined (DEBUG) && !defined (DEVELOPMENT)
8449 	/* On release kernels, only allow _upgrading_ exc guard behavior */
8450 	task_exc_guard_behavior_t cur_behavior;
8451 
8452 	os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
8453 		if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
8454 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8455 		}
8456 
8457 		if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
8458 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8459 		}
8460 
8461 		/* no restrictions on CORPSE bit */
8462 	});
8463 #else
8464 	task->task_exc_guard = new_behavior;
8465 #endif
8466 	return KERN_SUCCESS;
8467 }
8468 
8469 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)8470 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
8471 {
8472 #if DEVELOPMENT || DEBUG
8473 	if (task == TASK_NULL) {
8474 		return KERN_INVALID_TASK;
8475 	}
8476 
8477 	task_lock(task);
8478 	if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
8479 		task->t_flags |= TF_NO_CORPSE_FORKING;
8480 	} else {
8481 		task->t_flags &= ~TF_NO_CORPSE_FORKING;
8482 	}
8483 	task_unlock(task);
8484 
8485 	return KERN_SUCCESS;
8486 #else
8487 	(void)task;
8488 	(void)behavior;
8489 	return KERN_NOT_SUPPORTED;
8490 #endif
8491 }
8492 
8493 boolean_t
task_corpse_forking_disabled(task_t task)8494 task_corpse_forking_disabled(task_t task)
8495 {
8496 	boolean_t disabled = FALSE;
8497 
8498 	task_lock(task);
8499 	disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
8500 	task_unlock(task);
8501 
8502 	return disabled;
8503 }
8504 
8505 #if __arm64__
8506 extern int legacy_footprint_entitlement_mode;
8507 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
8508 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
8509 
8510 
8511 void
task_set_legacy_footprint(task_t task)8512 task_set_legacy_footprint(
8513 	task_t task)
8514 {
8515 	task_lock(task);
8516 	task->task_legacy_footprint = TRUE;
8517 	task_unlock(task);
8518 }
8519 
8520 void
task_set_extra_footprint_limit(task_t task)8521 task_set_extra_footprint_limit(
8522 	task_t task)
8523 {
8524 	if (task->task_extra_footprint_limit) {
8525 		return;
8526 	}
8527 	task_lock(task);
8528 	if (task->task_extra_footprint_limit) {
8529 		task_unlock(task);
8530 		return;
8531 	}
8532 	task->task_extra_footprint_limit = TRUE;
8533 	task_unlock(task);
8534 	memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
8535 }
8536 
8537 void
task_set_ios13extended_footprint_limit(task_t task)8538 task_set_ios13extended_footprint_limit(
8539 	task_t task)
8540 {
8541 	if (task->task_ios13extended_footprint_limit) {
8542 		return;
8543 	}
8544 	task_lock(task);
8545 	if (task->task_ios13extended_footprint_limit) {
8546 		task_unlock(task);
8547 		return;
8548 	}
8549 	task->task_ios13extended_footprint_limit = TRUE;
8550 	task_unlock(task);
8551 	memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
8552 }
8553 #endif /* __arm64__ */
8554 
8555 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)8556 task_ledger_get_balance(
8557 	ledger_t        ledger,
8558 	int             ledger_idx)
8559 {
8560 	ledger_amount_t amount;
8561 	amount = 0;
8562 	ledger_get_balance(ledger, ledger_idx, &amount);
8563 	return amount;
8564 }
8565 
8566 /*
8567  * Gather the amount of memory counted in a task's footprint due to
8568  * being in a specific set of ledgers.
8569  */
8570 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)8571 task_ledgers_footprint(
8572 	ledger_t        ledger,
8573 	ledger_amount_t *ledger_resident,
8574 	ledger_amount_t *ledger_compressed)
8575 {
8576 	*ledger_resident = 0;
8577 	*ledger_compressed = 0;
8578 
8579 	/* purgeable non-volatile memory */
8580 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
8581 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
8582 
8583 	/* "default" tagged memory */
8584 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
8585 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
8586 
8587 	/* "network" currently never counts in the footprint... */
8588 
8589 	/* "media" tagged memory */
8590 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
8591 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
8592 
8593 	/* "graphics" tagged memory */
8594 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
8595 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
8596 
8597 	/* "neural" tagged memory */
8598 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
8599 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
8600 }
8601 
8602 #if CONFIG_MEMORYSTATUS
8603 /*
8604  * Credit any outstanding task dirty time to the ledger.
8605  * memstat_dirty_start is pushed forward to prevent any possibility of double
8606  * counting, making it safe to call this as often as necessary to ensure that
8607  * anyone reading the ledger gets up-to-date information.
8608  */
8609 void
task_ledger_settle_dirty_time(task_t t)8610 task_ledger_settle_dirty_time(task_t t)
8611 {
8612 	task_lock(t);
8613 
8614 	uint64_t start = t->memstat_dirty_start;
8615 	if (start) {
8616 		uint64_t now = mach_absolute_time();
8617 
8618 		uint64_t duration;
8619 		absolutetime_to_nanoseconds(now - start, &duration);
8620 
8621 		ledger_t ledger = get_task_ledger(t);
8622 		ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
8623 
8624 		t->memstat_dirty_start = now;
8625 	}
8626 
8627 	task_unlock(t);
8628 }
8629 #endif /* CONFIG_MEMORYSTATUS */
8630 
8631 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)8632 task_set_memory_ownership_transfer(
8633 	task_t    task,
8634 	boolean_t value)
8635 {
8636 	task_lock(task);
8637 	task->task_can_transfer_memory_ownership = !!value;
8638 	task_unlock(task);
8639 }
8640 
8641 #if DEVELOPMENT || DEBUG
8642 
8643 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)8644 task_set_no_footprint_for_debug(task_t task, boolean_t value)
8645 {
8646 	task_lock(task);
8647 	task->task_no_footprint_for_debug = !!value;
8648 	task_unlock(task);
8649 }
8650 
8651 int
task_get_no_footprint_for_debug(task_t task)8652 task_get_no_footprint_for_debug(task_t task)
8653 {
8654 	return task->task_no_footprint_for_debug;
8655 }
8656 
8657 #endif /* DEVELOPMENT || DEBUG */
8658 
8659 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)8660 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
8661 {
8662 	vm_object_t find_vmo;
8663 	size_t size = 0;
8664 
8665 	task_objq_lock(task);
8666 	if (query != NULL) {
8667 		queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
8668 		{
8669 			vm_object_query_t p = &query[size++];
8670 
8671 			/* make sure to not overrun */
8672 			if (size * sizeof(vm_object_query_data_t) > len) {
8673 				--size;
8674 				break;
8675 			}
8676 
8677 			bzero(p, sizeof(*p));
8678 			p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
8679 			p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
8680 			p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
8681 			p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
8682 			p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
8683 			p->vo_no_footprint = find_vmo->vo_no_footprint;
8684 			p->vo_ledger_tag = find_vmo->vo_ledger_tag;
8685 			p->purgable = find_vmo->purgable;
8686 
8687 			if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
8688 				p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
8689 			} else {
8690 				p->compressed_size = 0;
8691 			}
8692 		}
8693 	} else {
8694 		size = (size_t)task->task_owned_objects;
8695 	}
8696 	task_objq_unlock(task);
8697 
8698 	*num = size;
8699 }
8700 
8701 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)8702 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
8703 {
8704 	assert(output_size);
8705 	assert(entries);
8706 
8707 	/* copy the vmobjects and vmobject data out of the task */
8708 	if (buffer_size == 0) {
8709 		task_copy_vmobjects(task, NULL, 0, entries);
8710 		*output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
8711 	} else {
8712 		assert(buffer);
8713 		task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
8714 		buffer->entries = (uint64_t)*entries;
8715 		*output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
8716 	}
8717 }
8718 
8719 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)8720 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
8721 {
8722 	size_t buffer_size;
8723 	vmobject_list_output_t buffer;
8724 	size_t output_size;
8725 	size_t entries;
8726 
8727 	assert(to_task != from_task);
8728 
8729 	/* get the size, allocate a bufferr, and populate */
8730 	entries = 0;
8731 	output_size = 0;
8732 	task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
8733 
8734 	if (output_size) {
8735 		buffer_size = output_size;
8736 		buffer = kalloc_data(buffer_size, Z_WAITOK);
8737 
8738 		if (buffer) {
8739 			entries = 0;
8740 			output_size = 0;
8741 
8742 			task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
8743 
8744 			if (entries) {
8745 				to_task->corpse_vmobject_list = buffer;
8746 				to_task->corpse_vmobject_list_size = buffer_size;
8747 			}
8748 		}
8749 	}
8750 }
8751 
8752 void
task_set_filter_msg_flag(task_t task,boolean_t flag)8753 task_set_filter_msg_flag(
8754 	task_t task,
8755 	boolean_t flag)
8756 {
8757 	assert(task != TASK_NULL);
8758 
8759 	if (flag) {
8760 		task_ro_flags_set(task, TFRO_FILTER_MSG);
8761 	} else {
8762 		task_ro_flags_clear(task, TFRO_FILTER_MSG);
8763 	}
8764 }
8765 
8766 boolean_t
task_get_filter_msg_flag(task_t task)8767 task_get_filter_msg_flag(
8768 	task_t task)
8769 {
8770 	if (!task) {
8771 		return false;
8772 	}
8773 
8774 	return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
8775 }
8776 bool
task_is_exotic(task_t task)8777 task_is_exotic(
8778 	task_t task)
8779 {
8780 	if (task == TASK_NULL) {
8781 		return false;
8782 	}
8783 	return vm_map_is_exotic(get_task_map(task));
8784 }
8785 
8786 bool
task_is_alien(task_t task)8787 task_is_alien(
8788 	task_t task)
8789 {
8790 	if (task == TASK_NULL) {
8791 		return false;
8792 	}
8793 	return vm_map_is_alien(get_task_map(task));
8794 }
8795 
8796 
8797 
8798 #if CONFIG_MACF
8799 /* Set the filter mask for Mach traps. */
8800 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)8801 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
8802 {
8803 	assert(task);
8804 
8805 	task_set_mach_trap_filter_mask(task, maskptr);
8806 }
8807 
8808 /* Set the filter mask for kobject msgs. */
8809 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)8810 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
8811 {
8812 	assert(task);
8813 
8814 	task_set_mach_kobj_filter_mask(task, maskptr);
8815 }
8816 
8817 /* Hook for mach trap/sc filter evaluation policy. */
8818 mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate = NULL;
8819 
8820 /* Hook for kobj message filter evaluation policy. */
8821 mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate = NULL;
8822 
8823 /* Set the callback hooks for the filtering policy. */
8824 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)8825 mac_task_register_filter_callbacks(
8826 	const mac_task_mach_filter_cbfunc_t mach_cbfunc,
8827 	const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
8828 {
8829 	if (mach_cbfunc != NULL) {
8830 		if (mac_task_mach_trap_evaluate != NULL) {
8831 			return KERN_FAILURE;
8832 		}
8833 		mac_task_mach_trap_evaluate = mach_cbfunc;
8834 	}
8835 	if (kobj_cbfunc != NULL) {
8836 		if (mac_task_kobj_msg_evaluate != NULL) {
8837 			return KERN_FAILURE;
8838 		}
8839 		mac_task_kobj_msg_evaluate = kobj_cbfunc;
8840 	}
8841 
8842 	return KERN_SUCCESS;
8843 }
8844 #endif /* CONFIG_MACF */
8845 
8846 #if CONFIG_ROSETTA
8847 bool
task_is_translated(task_t task)8848 task_is_translated(task_t task)
8849 {
8850 	extern boolean_t proc_is_translated(struct proc* p);
8851 	return task && proc_is_translated(get_bsdtask_info(task));
8852 }
8853 #endif
8854 
8855 
8856 #if __has_feature(ptrauth_calls)
8857 /* All pac violations will be delivered as fatal exceptions irrespective of
8858  * the enable_pac_exception boot-arg value.
8859  */
8860 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
8861 /*
8862  * When enable_pac_exception boot-arg is set to true, processes
8863  * can choose to get non-fatal pac exception delivery by setting
8864  * this entitlement.
8865  */
8866 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
8867 
8868 void
task_set_pac_exception_fatal_flag(task_t task)8869 task_set_pac_exception_fatal_flag(
8870 	task_t task)
8871 {
8872 	assert(task != TASK_NULL);
8873 	bool pac_entitlement = false;
8874 	uint32_t set_flags = 0;
8875 
8876 	if (enable_pac_exception && IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
8877 		return;
8878 	}
8879 
8880 	if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
8881 		pac_entitlement = true;
8882 	}
8883 
8884 	if (pac_entitlement) {
8885 		set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
8886 	}
8887 	if (pac_entitlement || (enable_pac_exception && task_get_platform_binary(task))) {
8888 		set_flags |= TFRO_PAC_EXC_FATAL;
8889 	}
8890 	if (set_flags != 0) {
8891 		task_ro_flags_set(task, set_flags);
8892 	}
8893 }
8894 
8895 bool
task_is_pac_exception_fatal(task_t task)8896 task_is_pac_exception_fatal(
8897 	task_t task)
8898 {
8899 	assert(task != TASK_NULL);
8900 	return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
8901 }
8902 #endif /* __has_feature(ptrauth_calls) */
8903 
8904 bool
task_needs_user_signed_thread_state(task_t task)8905 task_needs_user_signed_thread_state(
8906 	task_t task)
8907 {
8908 	assert(task != TASK_NULL);
8909 	return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
8910 }
8911 
8912 void
task_set_tecs(task_t task)8913 task_set_tecs(task_t task)
8914 {
8915 	if (task == TASK_NULL) {
8916 		task = current_task();
8917 	}
8918 
8919 	if (!machine_csv(CPUVN_CI)) {
8920 		return;
8921 	}
8922 
8923 	LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
8924 
8925 	task_lock(task);
8926 
8927 	task->t_flags |= TF_TECS;
8928 
8929 	thread_t thread;
8930 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
8931 		machine_tecs(thread);
8932 	}
8933 	task_unlock(task);
8934 }
8935 
8936 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)8937 task_test_sync_upcall(
8938 	task_t     task,
8939 	ipc_port_t send_port)
8940 {
8941 #if DEVELOPMENT || DEBUG
8942 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
8943 		return KERN_INVALID_ARGUMENT;
8944 	}
8945 
8946 	/* Block on sync kernel upcall on the given send port */
8947 	mach_test_sync_upcall(send_port);
8948 
8949 	ipc_port_release_send(send_port);
8950 	return KERN_SUCCESS;
8951 #else
8952 	(void)task;
8953 	(void)send_port;
8954 	return KERN_NOT_SUPPORTED;
8955 #endif
8956 }
8957 
8958 kern_return_t
task_test_async_upcall_propagation(task_t task,ipc_port_t send_port,int qos,int iotier)8959 task_test_async_upcall_propagation(
8960 	task_t      task,
8961 	ipc_port_t  send_port,
8962 	int         qos,
8963 	int         iotier)
8964 {
8965 #if DEVELOPMENT || DEBUG
8966 	kern_return_t kr;
8967 
8968 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
8969 		return KERN_INVALID_ARGUMENT;
8970 	}
8971 
8972 	if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
8973 	    iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
8974 		return KERN_INVALID_ARGUMENT;
8975 	}
8976 
8977 	struct thread_attr_for_ipc_propagation attr = {
8978 		.tafip_iotier = iotier,
8979 		.tafip_qos = qos
8980 	};
8981 
8982 	/* Apply propagate attr to port */
8983 	kr = ipc_port_propagate_thread_attr(send_port, attr);
8984 	if (kr != KERN_SUCCESS) {
8985 		return kr;
8986 	}
8987 
8988 	thread_enable_send_importance(current_thread(), TRUE);
8989 
8990 	/* Perform an async kernel upcall on the given send port */
8991 	mach_test_async_upcall(send_port);
8992 	thread_enable_send_importance(current_thread(), FALSE);
8993 
8994 	ipc_port_release_send(send_port);
8995 	return KERN_SUCCESS;
8996 #else
8997 	(void)task;
8998 	(void)send_port;
8999 	(void)qos;
9000 	(void)iotier;
9001 	return KERN_NOT_SUPPORTED;
9002 #endif
9003 }
9004 
9005 #if CONFIG_PROC_RESOURCE_LIMITS
9006 mach_port_name_t
current_task_get_fatal_port_name(void)9007 current_task_get_fatal_port_name(void)
9008 {
9009 	mach_port_t task_fatal_port = MACH_PORT_NULL;
9010 	mach_port_name_t port_name = 0;
9011 
9012 	task_fatal_port = task_allocate_fatal_port();
9013 
9014 	if (task_fatal_port) {
9015 		ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9016 		    IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9017 	}
9018 
9019 	return port_name;
9020 }
9021 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9022 
9023 #if defined(__x86_64__)
9024 bool
curtask_get_insn_copy_optout(void)9025 curtask_get_insn_copy_optout(void)
9026 {
9027 	bool optout;
9028 	task_t cur_task = current_task();
9029 
9030 	task_lock(cur_task);
9031 	optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9032 	task_unlock(cur_task);
9033 
9034 	return optout;
9035 }
9036 
9037 void
curtask_set_insn_copy_optout(void)9038 curtask_set_insn_copy_optout(void)
9039 {
9040 	task_t cur_task = current_task();
9041 
9042 	task_lock(cur_task);
9043 
9044 	cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9045 
9046 	thread_t thread;
9047 	queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9048 		machine_thread_set_insn_copy_optout(thread);
9049 	}
9050 	task_unlock(cur_task);
9051 }
9052 #endif /* defined(__x86_64__) */
9053 
9054 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9055 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9056 {
9057 	assert(task);
9058 	assert(list_size);
9059 
9060 	*list = task->corpse_vmobject_list;
9061 	*list_size = (size_t)task->corpse_vmobject_list_size;
9062 }
9063 
9064 __abortlike
9065 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9066 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9067 {
9068 	panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9069 	    "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9070 }
9071 
9072 proc_ro_t
task_get_ro(task_t t)9073 task_get_ro(task_t t)
9074 {
9075 	proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9076 
9077 	zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9078 	if (__improbable(proc_ro_task(ro) != t)) {
9079 		panic_proc_ro_task_backref_mismatch(t, ro);
9080 	}
9081 
9082 	return ro;
9083 }
9084 
9085 uint32_t
task_ro_flags_get(task_t task)9086 task_ro_flags_get(task_t task)
9087 {
9088 	return task_get_ro(task)->t_flags_ro;
9089 }
9090 
9091 void
task_ro_flags_set(task_t task,uint32_t flags)9092 task_ro_flags_set(task_t task, uint32_t flags)
9093 {
9094 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9095 	    t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9096 }
9097 
9098 void
task_ro_flags_clear(task_t task,uint32_t flags)9099 task_ro_flags_clear(task_t task, uint32_t flags)
9100 {
9101 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9102 	    t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9103 }
9104 
9105 task_control_port_options_t
task_get_control_port_options(task_t task)9106 task_get_control_port_options(task_t task)
9107 {
9108 	return task_get_ro(task)->task_control_port_options;
9109 }
9110 
9111 void
task_set_control_port_options(task_t task,task_control_port_options_t opts)9112 task_set_control_port_options(task_t task, task_control_port_options_t opts)
9113 {
9114 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9115 	    task_control_port_options, &opts);
9116 }
9117 
9118 /*!
9119  * @function kdp_task_is_locked
9120  *
9121  * @abstract
9122  * Checks if task is locked.
9123  *
9124  * @discussion
9125  * NOT SAFE: To be used only by kernel debugger.
9126  *
9127  * @param task task to check
9128  *
9129  * @returns TRUE if the task is locked.
9130  */
9131 boolean_t
kdp_task_is_locked(task_t task)9132 kdp_task_is_locked(task_t task)
9133 {
9134 	return kdp_lck_mtx_lock_spin_is_acquired(&task->lock);
9135 }
9136