xref: /xnu-10063.141.1/osfmk/kern/task.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_FREE_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/task.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59  *		David Black
60  *
61  *	Task management primitives implementation.
62  */
63 /*
64  * Copyright (c) 1993 The University of Utah and
65  * the Computer Systems Laboratory (CSL).  All rights reserved.
66  *
67  * Permission to use, copy, modify and distribute this software and its
68  * documentation is hereby granted, provided that both the copyright
69  * notice and this permission notice appear in all copies of the
70  * software, derivative works or modified versions, and any portions
71  * thereof, and that both notices appear in supporting documentation.
72  *
73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76  *
77  * CSL requests users of this software to return to [email protected] any
78  * improvements that they make and grant CSL redistribution rights.
79  *
80  */
81 /*
82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83  * support for mandatory and extensible security protections.  This notice
84  * is included in support of clause 2.2 (b) of the Apple Public License,
85  * Version 2.0.
86  * Copyright (c) 2005 SPARTA, Inc.
87  */
88 
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101 
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108 
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/recount.h>
120 #include <kern/sched_prim.h>    /* for thread_wakeup */
121 #include <kern/ipc_tt.h>
122 #include <kern/host.h>
123 #include <kern/clock.h>
124 #include <kern/timer.h>
125 #include <kern/assert.h>
126 #include <kern/affinity.h>
127 #include <kern/exc_resource.h>
128 #include <kern/machine.h>
129 #include <kern/policy_internal.h>
130 #include <kern/restartable.h>
131 #include <kern/ipc_kobject.h>
132 
133 #include <corpses/task_corpse.h>
134 #if CONFIG_TELEMETRY
135 #include <kern/telemetry.h>
136 #endif
137 
138 #if CONFIG_PERVASIVE_CPI
139 #include <kern/monotonic.h>
140 #include <machine/monotonic.h>
141 #endif /* CONFIG_PERVASIVE_CPI */
142 
143 #if CONFIG_EXCLAVES
144 #include "exclaves_boot.h"
145 #include "exclaves_resource.h"
146 #include "exclaves_boot.h"
147 #include "kern/exclaves.tightbeam.h"
148 #endif /* CONFIG_EXCLAVES */
149 
150 #include <os/log.h>
151 
152 #include <vm/pmap.h>
153 #include <vm/vm_map.h>
154 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
155 #include <vm/vm_pageout.h>
156 #include <vm/vm_protos.h>
157 #include <vm/vm_purgeable_internal.h>
158 #include <vm/vm_compressor_pager.h>
159 #include <vm/vm_reclaim_internal.h>
160 
161 #include <sys/proc_ro.h>
162 #include <sys/resource.h>
163 #include <sys/signalvar.h> /* for coredump */
164 #include <sys/bsdtask_info.h>
165 #include <sys/kdebug_triage.h>
166 #include <sys/code_signing.h> /* for address_space_debugged */
167 /*
168  * Exported interfaces
169  */
170 
171 #include <mach/task_server.h>
172 #include <mach/mach_host_server.h>
173 #include <mach/mach_port_server.h>
174 
175 #include <vm/vm_shared_region.h>
176 
177 #include <libkern/OSDebug.h>
178 #include <libkern/OSAtomic.h>
179 #include <libkern/section_keywords.h>
180 
181 #include <mach-o/loader.h>
182 #include <kdp/kdp_dyld.h>
183 
184 #include <kern/sfi.h>           /* picks up ledger.h */
185 
186 #if CONFIG_MACF
187 #include <security/mac_mach_internal.h>
188 #endif
189 
190 #include <IOKit/IOBSD.h>
191 #include <kdp/processor_core.h>
192 
193 #include <string.h>
194 
195 #if KPERF
196 extern int kpc_force_all_ctrs(task_t, int);
197 #endif
198 
199 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
200 
201 int64_t         next_taskuniqueid = 0;
202 const size_t task_alignment = _Alignof(struct task);
203 extern const size_t proc_alignment;
204 extern size_t proc_struct_size;
205 extern size_t proc_and_task_size;
206 size_t task_struct_size;
207 
208 extern uint32_t ipc_control_port_options;
209 
210 extern int large_corpse_count;
211 
212 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
213 extern void task_disown_frozen_csegs(task_t owner_task);
214 
215 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
216 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
217 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
218 static inline void task_zone_init(void);
219 
220 #if CONFIG_EXCLAVES
221 static bool task_should_panic_on_exit_due_to_conclave_taint(task_t task);
222 static bool task_is_conclave_tainted(task_t task);
223 static void task_set_conclave_taint(task_t task);
224 kern_return_t task_crash_info_conclave_upcall(task_t task,
225     const xnuupcalls_conclavesharedbuffer_s *shared_buf, uint32_t length);
226 kern_return_t
227 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *_Nonnull result, void *kcdata_ptr);
228 #endif /* CONFIG_EXCLAVES */
229 
230 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
231 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
232     .iko_op_no_senders = task_port_no_senders);
233 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
234     .iko_op_no_senders = task_port_with_flavor_no_senders);
235 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
236     .iko_op_no_senders = task_port_with_flavor_no_senders);
237 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
238     .iko_op_no_senders = task_suspension_no_senders);
239 
240 #if CONFIG_PROC_RESOURCE_LIMITS
241 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
242 static mach_port_t task_allocate_fatal_port(void);
243 
244 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
245     .iko_op_stable     = true,
246     .iko_op_no_senders = task_fatal_port_no_senders);
247 
248 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
249 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
250 
251 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
252 int audio_active = 0;
253 
254 /*
255  *	structure for tracking zone usage
256  *	Used either one per task/thread for all zones or <per-task,per-zone>.
257  */
258 typedef struct zinfo_usage_store_t {
259 	/* These fields may be updated atomically, and so must be 8 byte aligned */
260 	uint64_t        alloc __attribute__((aligned(8)));              /* allocation counter */
261 	uint64_t        free __attribute__((aligned(8)));               /* free counter */
262 } zinfo_usage_store_t;
263 
264 /**
265  * Return codes related to diag threshold and memory limit
266  */
267 __options_decl(diagthreshold_check_return, int, {
268 	THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED        = 0,
269 	THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED         = 1,
270 	THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED    = 2,
271 	THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED     = 3,
272 });
273 
274 /**
275  * Return codes related to diag threshold and memory limit
276  */
277 __options_decl(current_, int, {
278 	THRESHOLD_IS_SAME_AS_LIMIT      = 0,
279 	THRESHOLD_IS_NOT_SAME_AS_LIMIT  = 1
280 });
281 
282 zinfo_usage_store_t tasks_tkm_private;
283 zinfo_usage_store_t tasks_tkm_shared;
284 
285 /* A container to accumulate statistics for expired tasks */
286 expired_task_statistics_t               dead_task_statistics;
287 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
288 
289 ledger_template_t task_ledger_template = NULL;
290 
291 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
292 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
293 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
294 
295 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
296 {.cpu_time = -1,
297  .tkm_private = -1,
298  .tkm_shared = -1,
299  .phys_mem = -1,
300  .wired_mem = -1,
301  .internal = -1,
302  .iokit_mapped = -1,
303  .external = -1,
304  .reusable = -1,
305  .alternate_accounting = -1,
306  .alternate_accounting_compressed = -1,
307  .page_table = -1,
308  .phys_footprint = -1,
309  .internal_compressed = -1,
310  .purgeable_volatile = -1,
311  .purgeable_nonvolatile = -1,
312  .purgeable_volatile_compressed = -1,
313  .purgeable_nonvolatile_compressed = -1,
314  .tagged_nofootprint = -1,
315  .tagged_footprint = -1,
316  .tagged_nofootprint_compressed = -1,
317  .tagged_footprint_compressed = -1,
318  .network_volatile = -1,
319  .network_nonvolatile = -1,
320  .network_volatile_compressed = -1,
321  .network_nonvolatile_compressed = -1,
322  .media_nofootprint = -1,
323  .media_footprint = -1,
324  .media_nofootprint_compressed = -1,
325  .media_footprint_compressed = -1,
326  .graphics_nofootprint = -1,
327  .graphics_footprint = -1,
328  .graphics_nofootprint_compressed = -1,
329  .graphics_footprint_compressed = -1,
330  .neural_nofootprint = -1,
331  .neural_footprint = -1,
332  .neural_nofootprint_compressed = -1,
333  .neural_footprint_compressed = -1,
334  .platform_idle_wakeups = -1,
335  .interrupt_wakeups = -1,
336 #if CONFIG_SCHED_SFI
337  .sfi_wait_times = { 0 /* initialized at runtime */},
338 #endif /* CONFIG_SCHED_SFI */
339  .cpu_time_billed_to_me = -1,
340  .cpu_time_billed_to_others = -1,
341  .physical_writes = -1,
342  .logical_writes = -1,
343  .logical_writes_to_external = -1,
344 #if DEBUG || DEVELOPMENT
345  .pages_grabbed = -1,
346  .pages_grabbed_kern = -1,
347  .pages_grabbed_iopl = -1,
348  .pages_grabbed_upl = -1,
349 #endif
350 #if CONFIG_FREEZE
351  .frozen_to_swap = -1,
352 #endif /* CONFIG_FREEZE */
353  .energy_billed_to_me = -1,
354  .energy_billed_to_others = -1,
355 #if CONFIG_PHYS_WRITE_ACCT
356  .fs_metadata_writes = -1,
357 #endif /* CONFIG_PHYS_WRITE_ACCT */
358 #if CONFIG_MEMORYSTATUS
359  .memorystatus_dirty_time = -1,
360 #endif /* CONFIG_MEMORYSTATUS */
361  .swapins = -1,
362  .conclave_mem = -1, };
363 
364 /* System sleep state */
365 boolean_t tasks_suspend_state;
366 
367 __options_decl(send_exec_resource_is_fatal, bool, {
368 	IS_NOT_FATAL            = false,
369 	IS_FATAL                = true
370 });
371 
372 __options_decl(send_exec_resource_is_diagnostics, bool, {
373 	IS_NOT_DIAGNOSTICS      = false,
374 	IS_DIAGNOSTICS          = true
375 });
376 
377 __options_decl(send_exec_resource_is_warning, bool, {
378 	IS_NOT_WARNING          = false,
379 	IS_WARNING              = true
380 });
381 
382 __options_decl(send_exec_resource_options_t, uint8_t, {
383 	EXEC_RESOURCE_FATAL = 0x01,
384 	EXEC_RESOURCE_DIAGNOSTIC = 0x02,
385 	EXEC_RESOURCE_WARNING = 0x04,
386 });
387 
388 /**
389  * Actions to take when a process has reached the memory limit or the diagnostics threshold limits
390  */
391 static inline void task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning);
392 #if DEBUG || DEVELOPMENT
393 static inline void task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size);
394 #endif
395 void init_task_ledgers(void);
396 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
397 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
398 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
399 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
400 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options);
401 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
402 #if CONFIG_PROC_RESOURCE_LIMITS
403 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
404 mach_port_name_t current_task_get_fatal_port_name(void);
405 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit);
406 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
407 
408 kern_return_t task_suspend_internal_locked(task_t);
409 kern_return_t task_suspend_internal(task_t);
410 kern_return_t task_resume_internal_locked(task_t);
411 kern_return_t task_resume_internal(task_t);
412 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
413 
414 extern kern_return_t iokit_task_terminate(task_t task, int phase);
415 extern void          iokit_task_app_suspended_changed(task_t task);
416 
417 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
418 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
419 extern kern_return_t thread_resume(thread_t thread);
420 
421 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
422 
423 // Condition to include diag footprints
424 #define RESETTABLE_DIAG_FOOTPRINT_LIMITS ((DEBUG || DEVELOPMENT) && CONFIG_MEMORYSTATUS)
425 
426 // Warn tasks when they hit 80% of their memory limit.
427 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
428 
429 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
430 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
431 
432 /*
433  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
434  *
435  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
436  *  stacktraces, aka micro-stackshots)
437  */
438 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
439 
440 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
441 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
442 
443 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
444 
445 TUNABLE(bool, disable_exc_resource, "disable_exc_resource", false); /* Global override to suppress EXC_RESOURCE for resource monitor violations. */
446 TUNABLE(bool, disable_exc_resource_during_audio, "disable_exc_resource_during_audio", true); /* Global override to suppress EXC_RESOURCE while audio is active */
447 
448 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
449 unsigned int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
450 
451 /*
452  * Configure per-task memory limit.
453  * The boot-arg is interpreted as Megabytes,
454  * and takes precedence over the device tree.
455  * Setting the boot-arg to 0 disables task limits.
456  */
457 TUNABLE_DT_WRITEABLE(int, max_task_footprint_mb, "/defaults", "kern.max_task_pmem", "max_task_pmem", 0, TUNABLE_DT_NONE);
458 
459 /* I/O Monitor Limits */
460 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
461 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
462 
463 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
464 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
465 
466 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
467 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
468 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
469 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
470 static boolean_t global_update_logical_writes(int64_t, int64_t*);
471 
472 #if DEBUG || DEVELOPMENT
473 static diagthreshold_check_return task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value);
474 #endif
475 #define TASK_MAX_THREAD_LIMIT 256
476 
477 #if MACH_ASSERT
478 int pmap_ledgers_panic = 1;
479 int pmap_ledgers_panic_leeway = 3;
480 #endif /* MACH_ASSERT */
481 
482 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
483 
484 #if CONFIG_COREDUMP
485 int hwm_user_cores = 0; /* high watermark violations generate user core files */
486 #endif
487 
488 #ifdef MACH_BSD
489 extern uint32_t proc_platform(const struct proc *);
490 extern uint32_t proc_sdk(struct proc *);
491 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
492 extern int      proc_pid(struct proc *p);
493 extern int      proc_selfpid(void);
494 extern struct proc *current_proc(void);
495 extern char     *proc_name_address(struct proc *p);
496 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
497 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
498 extern void workq_proc_suspended(struct proc *p);
499 extern void workq_proc_resumed(struct proc *p);
500 extern struct proc *kernproc;
501 
502 #if CONFIG_MEMORYSTATUS
503 extern void     proc_memstat_skip(struct proc* p, boolean_t set);
504 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, bool memlimit_is_active, bool memlimit_is_fatal);
505 extern void     memorystatus_log_exception(const int max_footprint_mb, bool memlimit_is_active, bool memlimit_is_fatal);
506 extern void     memorystatus_log_diag_threshold_exception(const int diag_threshold_value);
507 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
508 extern uint64_t  memorystatus_available_memory_internal(struct proc *p);
509 
510 #if DEVELOPMENT || DEBUG
511 extern void memorystatus_abort_vm_map_fork(task_t);
512 #endif
513 
514 #endif /* CONFIG_MEMORYSTATUS */
515 
516 #endif /* MACH_BSD */
517 
518 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
519 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
520 
521 /*
522  * Defaults for controllable EXC_GUARD behaviors
523  *
524  * Internal builds are fatal by default (except BRIDGE).
525  * Create an alternate set of defaults for special processes by name.
526  */
527 struct task_exc_guard_named_default {
528 	char *name;
529 	uint32_t behavior;
530 };
531 #define _TASK_EXC_GUARD_MP_CORPSE  (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
532 #define _TASK_EXC_GUARD_MP_ONCE    (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
533 #define _TASK_EXC_GUARD_MP_FATAL   (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
534 
535 #define _TASK_EXC_GUARD_VM_CORPSE  (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
536 #define _TASK_EXC_GUARD_VM_ONCE    (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
537 #define _TASK_EXC_GUARD_VM_FATAL   (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
538 
539 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
540 #define _TASK_EXC_GUARD_ALL_ONCE   (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
541 #define _TASK_EXC_GUARD_ALL_FATAL  (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
542 
543 /* cannot turn off FATAL and DELIVER bit if set */
544 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
545     TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
546 /* cannot turn on ONCE bit if unset */
547 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
548 
549 #if !defined(XNU_TARGET_OS_BRIDGE)
550 
551 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
552 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
553 /*
554  * These "by-process-name" default overrides are intended to be a short-term fix to
555  * quickly get over races between changes introducing new EXC_GUARD raising behaviors
556  * in some process and a change in default behavior for same. We should ship with
557  * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
558  * exception behavior via task_set_exc_guard_behavior()).
559  *
560  * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
561  * task_exc_guard_default when transitioning this list between empty and
562  * non-empty.
563  */
564 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
565 
566 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
567 
568 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
569 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
570 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
571 
572 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
573 
574 /* Forwards */
575 
576 static void task_hold_locked(task_t task);
577 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
578 static void task_release_locked(task_t task);
579 extern task_t proc_get_task_raw(void *proc);
580 extern void task_ref_hold_proc_task_struct(task_t task);
581 extern void task_release_proc_task_struct(task_t task, proc_ro_t proc_ro);
582 
583 static void task_synchronizer_destroy_all(task_t task);
584 static os_ref_count_t
585 task_add_turnstile_watchports_locked(
586 	task_t                      task,
587 	struct task_watchports      *watchports,
588 	struct task_watchport_elem  **previous_elem_array,
589 	ipc_port_t                  *portwatch_ports,
590 	uint32_t                    portwatch_count);
591 
592 static os_ref_count_t
593 task_remove_turnstile_watchports_locked(
594 	task_t                 task,
595 	struct task_watchports *watchports,
596 	ipc_port_t             *port_freelist);
597 
598 static struct task_watchports *
599 task_watchports_alloc_init(
600 	task_t        task,
601 	thread_t      thread,
602 	uint32_t      count);
603 
604 static void
605 task_watchports_deallocate(
606 	struct task_watchports *watchports);
607 
608 __attribute__((always_inline)) inline void
task_lock(task_t task)609 task_lock(task_t task)
610 {
611 	lck_mtx_lock(&(task)->lock);
612 }
613 
614 __attribute__((always_inline)) inline void
task_unlock(task_t task)615 task_unlock(task_t task)
616 {
617 	lck_mtx_unlock(&(task)->lock);
618 }
619 
620 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)621 task_set_64bit(
622 	task_t task,
623 	boolean_t is_64bit,
624 	boolean_t is_64bit_data)
625 {
626 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
627 	thread_t thread;
628 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
629 
630 	task_lock(task);
631 
632 	/*
633 	 * Switching to/from 64-bit address spaces
634 	 */
635 	if (is_64bit) {
636 		if (!task_has_64Bit_addr(task)) {
637 			task_set_64Bit_addr(task);
638 		}
639 	} else {
640 		if (task_has_64Bit_addr(task)) {
641 			task_clear_64Bit_addr(task);
642 		}
643 	}
644 
645 	/*
646 	 * Switching to/from 64-bit register state.
647 	 */
648 	if (is_64bit_data) {
649 		if (task_has_64Bit_data(task)) {
650 			goto out;
651 		}
652 
653 		task_set_64Bit_data(task);
654 	} else {
655 		if (!task_has_64Bit_data(task)) {
656 			goto out;
657 		}
658 
659 		task_clear_64Bit_data(task);
660 	}
661 
662 	/* FIXME: On x86, the thread save state flavor can diverge from the
663 	 * task's 64-bit feature flag due to the 32-bit/64-bit register save
664 	 * state dichotomy. Since we can be pre-empted in this interval,
665 	 * certain routines may observe the thread as being in an inconsistent
666 	 * state with respect to its task's 64-bitness.
667 	 */
668 
669 #if defined(__x86_64__) || defined(__arm64__)
670 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
671 		thread_mtx_lock(thread);
672 		machine_thread_switch_addrmode(thread);
673 		thread_mtx_unlock(thread);
674 	}
675 #endif /* defined(__x86_64__) || defined(__arm64__) */
676 
677 out:
678 	task_unlock(task);
679 }
680 
681 bool
task_get_64bit_addr(task_t task)682 task_get_64bit_addr(task_t task)
683 {
684 	return task_has_64Bit_addr(task);
685 }
686 
687 bool
task_get_64bit_data(task_t task)688 task_get_64bit_data(task_t task)
689 {
690 	return task_has_64Bit_data(task);
691 }
692 
693 void
task_set_platform_binary(task_t task,boolean_t is_platform)694 task_set_platform_binary(
695 	task_t task,
696 	boolean_t is_platform)
697 {
698 	if (is_platform) {
699 		task_ro_flags_set(task, TFRO_PLATFORM);
700 	} else {
701 		task_ro_flags_clear(task, TFRO_PLATFORM);
702 	}
703 }
704 
705 #if XNU_TARGET_OS_OSX
706 #if DEVELOPMENT || DEBUG
707 SECURITY_READ_ONLY_LATE(bool) AMFI_bootarg_disable_mach_hardening = false;
708 #endif /* DEVELOPMENT || DEBUG */
709 
710 void
task_disable_mach_hardening(task_t task)711 task_disable_mach_hardening(task_t task)
712 {
713 	task_ro_flags_set(task, TFRO_MACH_HARDENING_OPT_OUT);
714 }
715 
716 bool
task_opted_out_mach_hardening(task_t task)717 task_opted_out_mach_hardening(task_t task)
718 {
719 	if (!task) {
720 		return false;
721 	}
722 	return task_ro_flags_get(task) & TFRO_MACH_HARDENING_OPT_OUT;
723 }
724 #endif /* XNU_TARGET_OS_OSX */
725 
726 /*
727  * Use the `task_is_hardened_binary` macro below
728  * when applying new security policies.
729  *
730  * Kernel security policies now generally apply to
731  * "hardened binaries" - which are platform binaries, and
732  * third party binaries who adopt hardened runtime on ios.
733  */
734 boolean_t
task_get_platform_binary(task_t task)735 task_get_platform_binary(task_t task)
736 {
737 	return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
738 }
739 
740 static boolean_t
task_get_hardened_runtime(task_t task)741 task_get_hardened_runtime(task_t task)
742 {
743 	return (task_ro_flags_get(task) & TFRO_HARDENED) != 0;
744 }
745 
746 boolean_t
task_is_hardened_binary(task_t task)747 task_is_hardened_binary(task_t task)
748 {
749 	return task_get_platform_binary(task) ||
750 	       task_get_hardened_runtime(task);
751 }
752 
753 void
task_set_hardened_runtime(task_t task,bool is_hardened)754 task_set_hardened_runtime(
755 	task_t task,
756 	bool is_hardened)
757 {
758 	if (is_hardened) {
759 		task_ro_flags_set(task, TFRO_HARDENED);
760 	} else {
761 		task_ro_flags_clear(task, TFRO_HARDENED);
762 	}
763 }
764 
765 boolean_t
task_is_a_corpse(task_t task)766 task_is_a_corpse(task_t task)
767 {
768 	return (task_ro_flags_get(task) & TFRO_CORPSE) != 0;
769 }
770 
771 boolean_t
task_is_ipc_active(task_t task)772 task_is_ipc_active(task_t task)
773 {
774 	return task->ipc_active;
775 }
776 
777 void
task_set_corpse(task_t task)778 task_set_corpse(task_t task)
779 {
780 	return task_ro_flags_set(task, TFRO_CORPSE);
781 }
782 
783 void
task_set_immovable_pinned(task_t task)784 task_set_immovable_pinned(task_t task)
785 {
786 	ipc_task_set_immovable_pinned(task);
787 }
788 
789 /*
790  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
791  * Returns "false" if flag is already set, and "true" in other cases.
792  */
793 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)794 task_set_ca_client_wi(
795 	task_t task,
796 	boolean_t set_or_clear)
797 {
798 	bool ret = true;
799 	task_lock(task);
800 	if (set_or_clear) {
801 		/* Tasks can have only one CA_CLIENT work interval */
802 		if (task->t_flags & TF_CA_CLIENT_WI) {
803 			ret = false;
804 		} else {
805 			task->t_flags |= TF_CA_CLIENT_WI;
806 		}
807 	} else {
808 		task->t_flags &= ~TF_CA_CLIENT_WI;
809 	}
810 	task_unlock(task);
811 	return ret;
812 }
813 
814 /*
815  * task_set_dyld_info() is called at most three times.
816  * 1) at task struct creation to set addr/size to zero.
817  * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
818  * 3) is from dyld itself to update location of all_image_info
819  * For security any calls after that are ignored.  The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
820  */
821 kern_return_t
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)822 task_set_dyld_info(
823 	task_t            task,
824 	mach_vm_address_t addr,
825 	mach_vm_size_t    size)
826 {
827 	mach_vm_address_t end;
828 	if (os_add_overflow(addr, size, &end)) {
829 		return KERN_FAILURE;
830 	}
831 
832 	task_lock(task);
833 	/* don't accept updates if all_image_info_addr is final */
834 	if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
835 		bool inputNonZero   = ((addr != 0) || (size != 0));
836 		bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
837 		task->all_image_info_addr = addr;
838 		task->all_image_info_size = size;
839 		/* can only change from a non-zero value to another non-zero once */
840 		if (inputNonZero && currentNonZero) {
841 			task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
842 		}
843 		task_unlock(task);
844 		return KERN_SUCCESS;
845 	} else {
846 		task_unlock(task);
847 		return KERN_FAILURE;
848 	}
849 }
850 
851 bool
task_donates_own_pages(task_t task)852 task_donates_own_pages(
853 	task_t task)
854 {
855 	return task->donates_own_pages;
856 }
857 
858 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)859 task_set_mach_header_address(
860 	task_t task,
861 	mach_vm_address_t addr)
862 {
863 	task_lock(task);
864 	task->mach_header_vm_address = addr;
865 	task_unlock(task);
866 }
867 
868 void
task_bank_reset(__unused task_t task)869 task_bank_reset(__unused task_t task)
870 {
871 	if (task->bank_context != NULL) {
872 		bank_task_destroy(task);
873 	}
874 }
875 
876 /*
877  * NOTE: This should only be called when the P_LINTRANSIT
878  *	 flag is set (the proc_trans lock is held) on the
879  *	 proc associated with the task.
880  */
881 void
task_bank_init(__unused task_t task)882 task_bank_init(__unused task_t task)
883 {
884 	if (task->bank_context != NULL) {
885 		panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
886 	}
887 	bank_task_initialize(task);
888 }
889 
890 void
task_set_did_exec_flag(task_t task)891 task_set_did_exec_flag(task_t task)
892 {
893 	task->t_procflags |= TPF_DID_EXEC;
894 }
895 
896 void
task_clear_exec_copy_flag(task_t task)897 task_clear_exec_copy_flag(task_t task)
898 {
899 	task->t_procflags &= ~TPF_EXEC_COPY;
900 }
901 
902 event_t
task_get_return_wait_event(task_t task)903 task_get_return_wait_event(task_t task)
904 {
905 	return (event_t)&task->returnwait_inheritor;
906 }
907 
908 void
task_clear_return_wait(task_t task,uint32_t flags)909 task_clear_return_wait(task_t task, uint32_t flags)
910 {
911 	if (flags & TCRW_CLEAR_INITIAL_WAIT) {
912 		thread_wakeup(task_get_return_wait_event(task));
913 	}
914 
915 	if (flags & TCRW_CLEAR_FINAL_WAIT) {
916 		is_write_lock(task->itk_space);
917 
918 		task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
919 		task->returnwait_inheritor = NULL;
920 
921 		if (flags & TCRW_CLEAR_EXEC_COMPLETE) {
922 			task->t_returnwaitflags &= ~TRW_LEXEC_COMPLETE;
923 		}
924 
925 		if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
926 			struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
927 			    TURNSTILE_ULOCK);
928 
929 			waitq_wakeup64_all(&turnstile->ts_waitq,
930 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
931 			    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
932 
933 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
934 
935 			turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
936 			turnstile_cleanup();
937 			task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
938 		}
939 		is_write_unlock(task->itk_space);
940 	}
941 }
942 
943 void __attribute__((noreturn))
task_wait_to_return(void)944 task_wait_to_return(void)
945 {
946 	task_t task = current_task();
947 	uint8_t returnwaitflags;
948 
949 	is_write_lock(task->itk_space);
950 
951 	if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
952 		struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
953 		    TURNSTILE_ULOCK);
954 
955 		do {
956 			task->t_returnwaitflags |= TRW_LRETURNWAITER;
957 			turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
958 			    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
959 
960 			waitq_assert_wait64(&turnstile->ts_waitq,
961 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
962 			    THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
963 
964 			is_write_unlock(task->itk_space);
965 
966 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
967 
968 			thread_block(THREAD_CONTINUE_NULL);
969 
970 			is_write_lock(task->itk_space);
971 		} while (task->t_returnwaitflags & TRW_LRETURNWAIT);
972 
973 		turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
974 	}
975 
976 	returnwaitflags = task->t_returnwaitflags;
977 	is_write_unlock(task->itk_space);
978 	turnstile_cleanup();
979 
980 
981 #if CONFIG_MACF
982 	/*
983 	 * Before jumping to userspace and allowing this process
984 	 * to execute any code, make sure its credentials are cached,
985 	 * and notify any interested parties.
986 	 */
987 	extern void current_cached_proc_cred_update(void);
988 
989 	current_cached_proc_cred_update();
990 	if (returnwaitflags & TRW_LEXEC_COMPLETE) {
991 		mac_proc_notify_exec_complete(current_proc());
992 	}
993 #endif
994 
995 	thread_bootstrap_return();
996 }
997 
998 boolean_t
task_is_exec_copy(task_t task)999 task_is_exec_copy(task_t task)
1000 {
1001 	return task_is_exec_copy_internal(task);
1002 }
1003 
1004 boolean_t
task_did_exec(task_t task)1005 task_did_exec(task_t task)
1006 {
1007 	return task_did_exec_internal(task);
1008 }
1009 
1010 boolean_t
task_is_active(task_t task)1011 task_is_active(task_t task)
1012 {
1013 	return task->active;
1014 }
1015 
1016 boolean_t
task_is_halting(task_t task)1017 task_is_halting(task_t task)
1018 {
1019 	return task->halting;
1020 }
1021 
1022 void
task_init(void)1023 task_init(void)
1024 {
1025 	if (max_task_footprint_mb != 0) {
1026 #if CONFIG_MEMORYSTATUS
1027 		if (max_task_footprint_mb < 50) {
1028 			printf("Warning: max_task_pmem %d below minimum.\n",
1029 			    max_task_footprint_mb);
1030 			max_task_footprint_mb = 50;
1031 		}
1032 		printf("Limiting task physical memory footprint to %d MB\n",
1033 		    max_task_footprint_mb);
1034 
1035 		max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024;         // Convert MB to bytes
1036 
1037 		/*
1038 		 * Configure the per-task memory limit warning level.
1039 		 * This is computed as a percentage.
1040 		 */
1041 		max_task_footprint_warning_level = 0;
1042 
1043 		if (max_mem < 0x40000000) {
1044 			/*
1045 			 * On devices with < 1GB of memory:
1046 			 *    -- set warnings to 50MB below the per-task limit.
1047 			 */
1048 			if (max_task_footprint_mb > 50) {
1049 				max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
1050 			}
1051 		} else {
1052 			/*
1053 			 * On devices with >= 1GB of memory:
1054 			 *    -- set warnings to 100MB below the per-task limit.
1055 			 */
1056 			if (max_task_footprint_mb > 100) {
1057 				max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
1058 			}
1059 		}
1060 
1061 		/*
1062 		 * Never allow warning level to land below the default.
1063 		 */
1064 		if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
1065 			max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
1066 		}
1067 
1068 		printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
1069 
1070 #else
1071 		printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
1072 #endif /* CONFIG_MEMORYSTATUS */
1073 	}
1074 
1075 #if DEVELOPMENT || DEBUG
1076 	PE_parse_boot_argn("task_exc_guard_default",
1077 	    &task_exc_guard_default,
1078 	    sizeof(task_exc_guard_default));
1079 #endif /* DEVELOPMENT || DEBUG */
1080 
1081 #if CONFIG_COREDUMP
1082 	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
1083 	    sizeof(hwm_user_cores))) {
1084 		hwm_user_cores = 0;
1085 	}
1086 #endif
1087 
1088 	proc_init_cpumon_params();
1089 
1090 	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
1091 		task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
1092 	}
1093 
1094 	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
1095 		task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
1096 	}
1097 
1098 	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
1099 	    sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
1100 		task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
1101 	}
1102 
1103 	if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
1104 		task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
1105 	}
1106 
1107 	if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
1108 		task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
1109 	}
1110 
1111 	if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
1112 		io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
1113 	}
1114 
1115 /*
1116  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
1117  * sets up the ledgers for the default coalition. If we don't have coalitions,
1118  * then we have to call it now.
1119  */
1120 #if CONFIG_COALITIONS
1121 	assert(task_ledger_template);
1122 #else /* CONFIG_COALITIONS */
1123 	init_task_ledgers();
1124 #endif /* CONFIG_COALITIONS */
1125 
1126 	task_ref_init();
1127 	task_zone_init();
1128 
1129 #ifdef __LP64__
1130 	boolean_t is_64bit = TRUE;
1131 #else
1132 	boolean_t is_64bit = FALSE;
1133 #endif
1134 
1135 	kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
1136 	kernel_task = proc_get_task_raw(kernproc);
1137 
1138 	/*
1139 	 * Create the kernel task as the first task.
1140 	 */
1141 	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1142 	    is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, kernel_task) != KERN_SUCCESS) {
1143 		panic("task_init");
1144 	}
1145 
1146 	ipc_task_enable(kernel_task);
1147 
1148 #if defined(HAS_APPLE_PAC)
1149 	kernel_task->rop_pid = ml_default_rop_pid();
1150 	kernel_task->jop_pid = ml_default_jop_pid();
1151 	// kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1152 	// disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1153 	ml_task_set_disable_user_jop(kernel_task, FALSE);
1154 #endif
1155 
1156 	vm_map_deallocate(kernel_task->map);
1157 	kernel_task->map = kernel_map;
1158 }
1159 
1160 static inline void
task_zone_init(void)1161 task_zone_init(void)
1162 {
1163 	proc_struct_size = roundup(proc_struct_size, task_alignment);
1164 	task_struct_size = roundup(sizeof(struct task), proc_alignment);
1165 	proc_and_task_size = proc_struct_size + task_struct_size;
1166 
1167 	proc_task_zone = zone_create_ext("proc_task", proc_and_task_size,
1168 	    ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1169 }
1170 
1171 /*
1172  * Task ledgers
1173  * ------------
1174  *
1175  * phys_footprint
1176  *   Physical footprint: This is the sum of:
1177  *     + (internal - alternate_accounting)
1178  *     + (internal_compressed - alternate_accounting_compressed)
1179  *     + iokit_mapped
1180  *     + purgeable_nonvolatile
1181  *     + purgeable_nonvolatile_compressed
1182  *     + page_table
1183  *
1184  * internal
1185  *   The task's anonymous memory, which on iOS is always resident.
1186  *
1187  * internal_compressed
1188  *   Amount of this task's internal memory which is held by the compressor.
1189  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1190  *   and could be either decompressed back into memory, or paged out to storage, depending
1191  *   on our implementation.
1192  *
1193  * iokit_mapped
1194  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1195  *    clean/dirty or internal/external state].
1196  *
1197  * alternate_accounting
1198  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1199  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1200  *   double counting.
1201  *
1202  * pages_grabbed
1203  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1204  *   which track UPL, IOPL and Kernel page grabs.
1205  */
1206 void
init_task_ledgers(void)1207 init_task_ledgers(void)
1208 {
1209 	ledger_template_t t;
1210 
1211 	assert(task_ledger_template == NULL);
1212 	assert(kernel_task == TASK_NULL);
1213 
1214 #if MACH_ASSERT
1215 	PE_parse_boot_argn("pmap_ledgers_panic",
1216 	    &pmap_ledgers_panic,
1217 	    sizeof(pmap_ledgers_panic));
1218 	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1219 	    &pmap_ledgers_panic_leeway,
1220 	    sizeof(pmap_ledgers_panic_leeway));
1221 #endif /* MACH_ASSERT */
1222 
1223 	if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1224 		panic("couldn't create task ledger template");
1225 	}
1226 
1227 	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1228 	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1229 	    "physmem", "bytes");
1230 	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1231 	    "bytes");
1232 	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1233 	    "bytes");
1234 	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1235 	    "bytes");
1236 	task_ledgers.conclave_mem = ledger_entry_add_with_flags(t, "conclave_mem", "physmem", "count",
1237 	    LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE | LEDGER_ENTRY_ALLOW_DEBIT);
1238 	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1239 	    "bytes");
1240 	task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1241 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1242 	task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1243 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1244 	task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1245 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1246 	task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1247 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1248 	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1249 	    "bytes");
1250 	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1251 	    "bytes");
1252 	task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1253 	task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1254 	task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1255 	task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1256 	task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1257 	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1258 #if DEBUG || DEVELOPMENT
1259 	task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1260 	task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1261 	task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1262 	task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1263 #endif
1264 	task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1265 	task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1266 	task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1267 	task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1268 	task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1269 	task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1270 	task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1271 	task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1272 	task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1273 	task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1274 	task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1275 	task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1276 	task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1277 	task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1278 	task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1279 	task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1280 	task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1281 	task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1282 	task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1283 	task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1284 
1285 #if CONFIG_FREEZE
1286 	task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1287 #endif /* CONFIG_FREEZE */
1288 
1289 	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1290 	    "count");
1291 	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1292 	    "count");
1293 
1294 #if CONFIG_SCHED_SFI
1295 	sfi_class_id_t class_id, ledger_alias;
1296 	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1297 		task_ledgers.sfi_wait_times[class_id] = -1;
1298 	}
1299 
1300 	/* don't account for UNSPECIFIED */
1301 	for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1302 		ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1303 		if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1304 			/* Check to see if alias has been registered yet */
1305 			if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1306 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1307 			} else {
1308 				/* Otherwise, initialize it first */
1309 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1310 			}
1311 		} else {
1312 			task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1313 		}
1314 
1315 		if (task_ledgers.sfi_wait_times[class_id] < 0) {
1316 			panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1317 		}
1318 	}
1319 
1320 	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1321 #endif /* CONFIG_SCHED_SFI */
1322 
1323 	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1324 	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1325 	task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1326 	task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1327 	task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1328 #if CONFIG_PHYS_WRITE_ACCT
1329 	task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1330 #endif /* CONFIG_PHYS_WRITE_ACCT */
1331 	task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1332 	task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1333 
1334 #if CONFIG_MEMORYSTATUS
1335 	task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1336 #endif /* CONFIG_MEMORYSTATUS */
1337 
1338 	task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1339 	    LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1340 
1341 	if ((task_ledgers.cpu_time < 0) ||
1342 	    (task_ledgers.tkm_private < 0) ||
1343 	    (task_ledgers.tkm_shared < 0) ||
1344 	    (task_ledgers.phys_mem < 0) ||
1345 	    (task_ledgers.wired_mem < 0) ||
1346 	    (task_ledgers.conclave_mem < 0) ||
1347 	    (task_ledgers.internal < 0) ||
1348 	    (task_ledgers.external < 0) ||
1349 	    (task_ledgers.reusable < 0) ||
1350 	    (task_ledgers.iokit_mapped < 0) ||
1351 	    (task_ledgers.alternate_accounting < 0) ||
1352 	    (task_ledgers.alternate_accounting_compressed < 0) ||
1353 	    (task_ledgers.page_table < 0) ||
1354 	    (task_ledgers.phys_footprint < 0) ||
1355 	    (task_ledgers.internal_compressed < 0) ||
1356 	    (task_ledgers.purgeable_volatile < 0) ||
1357 	    (task_ledgers.purgeable_nonvolatile < 0) ||
1358 	    (task_ledgers.purgeable_volatile_compressed < 0) ||
1359 	    (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1360 	    (task_ledgers.tagged_nofootprint < 0) ||
1361 	    (task_ledgers.tagged_footprint < 0) ||
1362 	    (task_ledgers.tagged_nofootprint_compressed < 0) ||
1363 	    (task_ledgers.tagged_footprint_compressed < 0) ||
1364 #if CONFIG_FREEZE
1365 	    (task_ledgers.frozen_to_swap < 0) ||
1366 #endif /* CONFIG_FREEZE */
1367 	    (task_ledgers.network_volatile < 0) ||
1368 	    (task_ledgers.network_nonvolatile < 0) ||
1369 	    (task_ledgers.network_volatile_compressed < 0) ||
1370 	    (task_ledgers.network_nonvolatile_compressed < 0) ||
1371 	    (task_ledgers.media_nofootprint < 0) ||
1372 	    (task_ledgers.media_footprint < 0) ||
1373 	    (task_ledgers.media_nofootprint_compressed < 0) ||
1374 	    (task_ledgers.media_footprint_compressed < 0) ||
1375 	    (task_ledgers.graphics_nofootprint < 0) ||
1376 	    (task_ledgers.graphics_footprint < 0) ||
1377 	    (task_ledgers.graphics_nofootprint_compressed < 0) ||
1378 	    (task_ledgers.graphics_footprint_compressed < 0) ||
1379 	    (task_ledgers.neural_nofootprint < 0) ||
1380 	    (task_ledgers.neural_footprint < 0) ||
1381 	    (task_ledgers.neural_nofootprint_compressed < 0) ||
1382 	    (task_ledgers.neural_footprint_compressed < 0) ||
1383 	    (task_ledgers.platform_idle_wakeups < 0) ||
1384 	    (task_ledgers.interrupt_wakeups < 0) ||
1385 	    (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1386 	    (task_ledgers.physical_writes < 0) ||
1387 	    (task_ledgers.logical_writes < 0) ||
1388 	    (task_ledgers.logical_writes_to_external < 0) ||
1389 #if CONFIG_PHYS_WRITE_ACCT
1390 	    (task_ledgers.fs_metadata_writes < 0) ||
1391 #endif /* CONFIG_PHYS_WRITE_ACCT */
1392 #if CONFIG_MEMORYSTATUS
1393 	    (task_ledgers.memorystatus_dirty_time < 0) ||
1394 #endif /* CONFIG_MEMORYSTATUS */
1395 	    (task_ledgers.energy_billed_to_me < 0) ||
1396 	    (task_ledgers.energy_billed_to_others < 0) ||
1397 	    (task_ledgers.swapins < 0)
1398 	    ) {
1399 		panic("couldn't create entries for task ledger template");
1400 	}
1401 
1402 	ledger_track_credit_only(t, task_ledgers.phys_footprint);
1403 	ledger_track_credit_only(t, task_ledgers.internal);
1404 	ledger_track_credit_only(t, task_ledgers.external);
1405 	ledger_track_credit_only(t, task_ledgers.reusable);
1406 
1407 	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1408 	ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1409 	ledger_track_maximum(t, task_ledgers.internal, 60);
1410 	ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1411 	ledger_track_maximum(t, task_ledgers.reusable, 60);
1412 	ledger_track_maximum(t, task_ledgers.external, 60);
1413 #if MACH_ASSERT
1414 	if (pmap_ledgers_panic) {
1415 		ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1416 		ledger_panic_on_negative(t, task_ledgers.conclave_mem);
1417 		ledger_panic_on_negative(t, task_ledgers.page_table);
1418 		ledger_panic_on_negative(t, task_ledgers.internal);
1419 		ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1420 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1421 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1422 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1423 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1424 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1425 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1426 #if CONFIG_PHYS_WRITE_ACCT
1427 		ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1428 #endif /* CONFIG_PHYS_WRITE_ACCT */
1429 
1430 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1431 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1432 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1433 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1434 		ledger_panic_on_negative(t, task_ledgers.network_volatile);
1435 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1436 		ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1437 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1438 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1439 		ledger_panic_on_negative(t, task_ledgers.media_footprint);
1440 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1441 		ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1442 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1443 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1444 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1445 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1446 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1447 		ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1448 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1449 		ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1450 	}
1451 #endif /* MACH_ASSERT */
1452 
1453 #if CONFIG_MEMORYSTATUS
1454 	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1455 #endif /* CONFIG_MEMORYSTATUS */
1456 
1457 	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1458 	    task_wakeups_rate_exceeded, NULL, NULL);
1459 	ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1460 
1461 #if CONFIG_SPTM || !XNU_MONITOR
1462 	ledger_template_complete(t);
1463 #else /* CONFIG_SPTM || !XNU_MONITOR */
1464 	ledger_template_complete_secure_alloc(t);
1465 #endif /* XNU_MONITOR */
1466 	task_ledger_template = t;
1467 }
1468 
1469 /* Create a task, but leave the task ports disabled */
1470 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_flags_ro,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t child_task)1471 task_create_internal(
1472 	task_t             parent_task,            /* Null-able */
1473 	proc_ro_t          proc_ro,
1474 	coalition_t        *parent_coalitions __unused,
1475 	boolean_t          inherit_memory,
1476 	boolean_t          is_64bit,
1477 	boolean_t          is_64bit_data,
1478 	uint32_t           t_flags,
1479 	uint32_t           t_flags_ro,
1480 	uint32_t           t_procflags,
1481 	uint8_t            t_returnwaitflags,
1482 	task_t             child_task)
1483 {
1484 	task_t                  new_task;
1485 	vm_shared_region_t      shared_region;
1486 	ledger_t                ledger = NULL;
1487 	struct task_ro_data     task_ro_data = {};
1488 	uint32_t                parent_t_flags_ro = 0;
1489 
1490 	new_task = child_task;
1491 
1492 	if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1493 		return KERN_RESOURCE_SHORTAGE;
1494 	}
1495 
1496 	/* allocate with active entries */
1497 	assert(task_ledger_template != NULL);
1498 	ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1499 	if (ledger == NULL) {
1500 		task_ref_count_fini(new_task);
1501 		return KERN_RESOURCE_SHORTAGE;
1502 	}
1503 
1504 	counter_alloc(&(new_task->faults));
1505 
1506 #if defined(HAS_APPLE_PAC)
1507 	const uint8_t disable_user_jop = inherit_memory ? parent_task->disable_user_jop : FALSE;
1508 	ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1509 	ml_task_set_jop_pid(new_task, parent_task, inherit_memory, disable_user_jop);
1510 	ml_task_set_disable_user_jop(new_task, disable_user_jop);
1511 #endif
1512 
1513 
1514 	new_task->ledger = ledger;
1515 
1516 	/* if inherit_memory is true, parent_task MUST not be NULL */
1517 	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1518 #if CONFIG_DEFERRED_RECLAIM
1519 		if (parent_task->deferred_reclamation_metadata) {
1520 			/*
1521 			 * Prevent concurrent reclaims while we're forking the parent_task's map,
1522 			 * so that the child's map is in sync with the forked reclamation
1523 			 * metadata.
1524 			 */
1525 			vm_deferred_reclamation_buffer_lock(
1526 				parent_task->deferred_reclamation_metadata);
1527 		}
1528 #endif /* CONFIG_DEFERRED_RECLAIM */
1529 		new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1530 #if CONFIG_DEFERRED_RECLAIM
1531 		if (new_task->map != NULL &&
1532 		    parent_task->deferred_reclamation_metadata) {
1533 			new_task->deferred_reclamation_metadata =
1534 			    vm_deferred_reclamation_buffer_fork(new_task,
1535 			    parent_task->deferred_reclamation_metadata);
1536 		}
1537 #endif /* CONFIG_DEFERRED_RECLAIM */
1538 	} else {
1539 		unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1540 		pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1541 		vm_map_t new_map;
1542 
1543 		if (pmap == NULL) {
1544 			counter_free(&new_task->faults);
1545 			ledger_dereference(ledger);
1546 			task_ref_count_fini(new_task);
1547 			return KERN_RESOURCE_SHORTAGE;
1548 		}
1549 		new_map = vm_map_create_options(pmap,
1550 		    (vm_map_offset_t)(VM_MIN_ADDRESS),
1551 		    (vm_map_offset_t)(VM_MAX_ADDRESS),
1552 		    VM_MAP_CREATE_PAGEABLE);
1553 		if (parent_task) {
1554 			vm_map_inherit_limits(new_map, parent_task->map);
1555 		}
1556 		new_task->map = new_map;
1557 	}
1558 
1559 	if (new_task->map == NULL) {
1560 		counter_free(&new_task->faults);
1561 		ledger_dereference(ledger);
1562 		task_ref_count_fini(new_task);
1563 		return KERN_RESOURCE_SHORTAGE;
1564 	}
1565 
1566 #if defined(CONFIG_SCHED_MULTIQ)
1567 	new_task->sched_group = sched_group_create();
1568 #endif
1569 
1570 	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1571 	queue_init(&new_task->threads);
1572 	new_task->suspend_count = 0;
1573 	new_task->thread_count = 0;
1574 	new_task->active_thread_count = 0;
1575 	new_task->user_stop_count = 0;
1576 	new_task->legacy_stop_count = 0;
1577 	new_task->active = TRUE;
1578 	new_task->halting = FALSE;
1579 	new_task->priv_flags = 0;
1580 	new_task->t_flags = t_flags;
1581 	task_ro_data.t_flags_ro = t_flags_ro;
1582 	new_task->t_procflags = t_procflags;
1583 	new_task->t_returnwaitflags = t_returnwaitflags;
1584 	new_task->returnwait_inheritor = current_thread();
1585 	new_task->importance = 0;
1586 	new_task->crashed_thread_id = 0;
1587 	new_task->watchports = NULL;
1588 	new_task->t_rr_ranges = NULL;
1589 
1590 	new_task->bank_context = NULL;
1591 
1592 	if (parent_task) {
1593 		parent_t_flags_ro = task_ro_flags_get(parent_task);
1594 	}
1595 
1596 	if (parent_task && inherit_memory) {
1597 #if __has_feature(ptrauth_calls)
1598 		/* Inherit the pac exception flags from parent if in fork */
1599 		task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1600 		    TFRO_PAC_EXC_FATAL));
1601 #endif /* __has_feature(ptrauth_calls) */
1602 		/* Inherit the hardened binary flags from parent if in fork */
1603 		task_ro_data.t_flags_ro |= parent_t_flags_ro & (TFRO_HARDENED | TFRO_PLATFORM | TFRO_JIT_EXC_FATAL);
1604 #if XNU_TARGET_OS_OSX
1605 		task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_MACH_HARDENING_OPT_OUT;
1606 #endif /* XNU_TARGET_OS_OSX */
1607 	}
1608 
1609 #ifdef MACH_BSD
1610 	new_task->corpse_info = NULL;
1611 #endif /* MACH_BSD */
1612 
1613 	/* kern_task not created by this function has unique id 0, start with 1 here. */
1614 	task_set_uniqueid(new_task);
1615 
1616 #if CONFIG_MACF
1617 	set_task_crash_label(new_task, NULL);
1618 
1619 	task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1620 	task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1621 #endif
1622 
1623 #if CONFIG_MEMORYSTATUS
1624 	if (max_task_footprint != 0) {
1625 		ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1626 	}
1627 #endif /* CONFIG_MEMORYSTATUS */
1628 
1629 	if (task_wakeups_monitor_rate != 0) {
1630 		uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1631 		int32_t  rate;        // Ignored because of WAKEMON_SET_DEFAULTS
1632 		task_wakeups_monitor_ctl(new_task, &flags, &rate);
1633 	}
1634 
1635 #if CONFIG_IO_ACCOUNTING
1636 	uint32_t flags = IOMON_ENABLE;
1637 	task_io_monitor_ctl(new_task, &flags);
1638 #endif /* CONFIG_IO_ACCOUNTING */
1639 
1640 	machine_task_init(new_task, parent_task, inherit_memory);
1641 
1642 	new_task->task_debug = NULL;
1643 
1644 #if DEVELOPMENT || DEBUG
1645 	new_task->task_unnested = FALSE;
1646 	new_task->task_disconnected_count = 0;
1647 #endif
1648 	queue_init(&new_task->semaphore_list);
1649 	new_task->semaphores_owned = 0;
1650 
1651 	new_task->vtimers = 0;
1652 
1653 	new_task->shared_region = NULL;
1654 
1655 	new_task->affinity_space = NULL;
1656 
1657 #if CONFIG_CPU_COUNTERS
1658 	new_task->t_kpc = 0;
1659 #endif /* CONFIG_CPU_COUNTERS */
1660 
1661 	new_task->pidsuspended = FALSE;
1662 	new_task->frozen = FALSE;
1663 	new_task->changing_freeze_state = FALSE;
1664 	new_task->rusage_cpu_flags = 0;
1665 	new_task->rusage_cpu_percentage = 0;
1666 	new_task->rusage_cpu_interval = 0;
1667 	new_task->rusage_cpu_deadline = 0;
1668 	new_task->rusage_cpu_callt = NULL;
1669 #if MACH_ASSERT
1670 	new_task->suspends_outstanding = 0;
1671 #endif
1672 	recount_task_init(&new_task->tk_recount);
1673 
1674 #if HYPERVISOR
1675 	new_task->hv_task_target = NULL;
1676 #endif /* HYPERVISOR */
1677 
1678 #if CONFIG_TASKWATCH
1679 	queue_init(&new_task->task_watchers);
1680 	new_task->num_taskwatchers  = 0;
1681 	new_task->watchapplying  = 0;
1682 #endif /* CONFIG_TASKWATCH */
1683 
1684 	new_task->mem_notify_reserved = 0;
1685 	new_task->memlimit_attrs_reserved = 0;
1686 
1687 	new_task->requested_policy = default_task_requested_policy;
1688 	new_task->effective_policy = default_task_effective_policy;
1689 
1690 	new_task->task_shared_region_slide = -1;
1691 
1692 	if (parent_task != NULL) {
1693 		task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1694 		task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1695 
1696 		/* only inherit the option bits, no effect until task_set_immovable_pinned() */
1697 		task_ro_data.task_control_port_options = task_get_control_port_options(parent_task);
1698 
1699 		task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1700 #if CONFIG_MACF
1701 		if (!(t_flags & TF_CORPSE_FORK)) {
1702 			task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(parent_task);
1703 			task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(parent_task);
1704 		}
1705 #endif
1706 	} else {
1707 		task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1708 		task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1709 
1710 		task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1711 	}
1712 
1713 	/* must set before task_importance_init_from_parent: */
1714 	if (proc_ro != NULL) {
1715 		new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1716 	} else {
1717 		new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1718 	}
1719 
1720 	ipc_task_init(new_task, parent_task);
1721 
1722 	task_importance_init_from_parent(new_task, parent_task);
1723 
1724 	new_task->corpse_vmobject_list = NULL;
1725 
1726 	if (parent_task != TASK_NULL) {
1727 		/* inherit the parent's shared region */
1728 		shared_region = vm_shared_region_get(parent_task);
1729 		if (shared_region != NULL) {
1730 			vm_shared_region_set(new_task, shared_region);
1731 		}
1732 
1733 #if __has_feature(ptrauth_calls)
1734 		/* use parent's shared_region_id */
1735 		char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1736 		if (shared_region_id != NULL) {
1737 			shared_region_key_alloc(shared_region_id, FALSE, 0);         /* get a reference */
1738 		}
1739 		task_set_shared_region_id(new_task, shared_region_id);
1740 #endif /* __has_feature(ptrauth_calls) */
1741 
1742 		if (task_has_64Bit_addr(parent_task)) {
1743 			task_set_64Bit_addr(new_task);
1744 		}
1745 
1746 		if (task_has_64Bit_data(parent_task)) {
1747 			task_set_64Bit_data(new_task);
1748 		}
1749 
1750 		new_task->all_image_info_addr = parent_task->all_image_info_addr;
1751 		new_task->all_image_info_size = parent_task->all_image_info_size;
1752 		new_task->mach_header_vm_address = 0;
1753 
1754 		if (inherit_memory && parent_task->affinity_space) {
1755 			task_affinity_create(parent_task, new_task);
1756 		}
1757 
1758 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1759 
1760 		new_task->task_exc_guard = parent_task->task_exc_guard;
1761 		if (parent_task->t_flags & TF_NO_SMT) {
1762 			new_task->t_flags |= TF_NO_SMT;
1763 		}
1764 
1765 		if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1766 			new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1767 		}
1768 
1769 		if (parent_task->t_flags & TF_TECS) {
1770 			new_task->t_flags |= TF_TECS;
1771 		}
1772 
1773 #if defined(__x86_64__)
1774 		if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1775 			new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1776 		}
1777 #endif
1778 
1779 		new_task->priority = BASEPRI_DEFAULT;
1780 		new_task->max_priority = MAXPRI_USER;
1781 
1782 		task_policy_create(new_task, parent_task);
1783 	} else {
1784 #ifdef __LP64__
1785 		if (is_64bit) {
1786 			task_set_64Bit_addr(new_task);
1787 		}
1788 #endif
1789 
1790 		if (is_64bit_data) {
1791 			task_set_64Bit_data(new_task);
1792 		}
1793 
1794 		new_task->all_image_info_addr = (mach_vm_address_t)0;
1795 		new_task->all_image_info_size = (mach_vm_size_t)0;
1796 
1797 		new_task->pset_hint = PROCESSOR_SET_NULL;
1798 
1799 		new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1800 
1801 		if (new_task == kernel_task) {
1802 			new_task->priority = BASEPRI_KERNEL;
1803 			new_task->max_priority = MAXPRI_KERNEL;
1804 		} else {
1805 			new_task->priority = BASEPRI_DEFAULT;
1806 			new_task->max_priority = MAXPRI_USER;
1807 		}
1808 	}
1809 
1810 	bzero(new_task->coalition, sizeof(new_task->coalition));
1811 	for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1812 		queue_chain_init(new_task->task_coalition[i]);
1813 	}
1814 
1815 	/* Allocate I/O Statistics */
1816 	new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1817 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1818 
1819 	bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1820 	bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1821 
1822 	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1823 
1824 	counter_alloc(&(new_task->pageins));
1825 	counter_alloc(&(new_task->cow_faults));
1826 	counter_alloc(&(new_task->messages_sent));
1827 	counter_alloc(&(new_task->messages_received));
1828 
1829 	/* Copy resource acc. info from Parent for Corpe Forked task. */
1830 	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1831 		task_rollup_accounting_info(new_task, parent_task);
1832 		task_store_owned_vmobject_info(new_task, parent_task);
1833 	} else {
1834 		/* Initialize to zero for standard fork/spawn case */
1835 		new_task->total_runnable_time = 0;
1836 		new_task->syscalls_mach = 0;
1837 		new_task->syscalls_unix = 0;
1838 		new_task->c_switch = 0;
1839 		new_task->p_switch = 0;
1840 		new_task->ps_switch = 0;
1841 		new_task->decompressions = 0;
1842 		new_task->low_mem_notified_warn = 0;
1843 		new_task->low_mem_notified_critical = 0;
1844 		new_task->purged_memory_warn = 0;
1845 		new_task->purged_memory_critical = 0;
1846 		new_task->low_mem_privileged_listener = 0;
1847 		new_task->memlimit_is_active = 0;
1848 		new_task->memlimit_is_fatal = 0;
1849 		new_task->memlimit_active_exc_resource = 0;
1850 		new_task->memlimit_inactive_exc_resource = 0;
1851 		new_task->task_timer_wakeups_bin_1 = 0;
1852 		new_task->task_timer_wakeups_bin_2 = 0;
1853 		new_task->task_gpu_ns = 0;
1854 		new_task->task_writes_counters_internal.task_immediate_writes = 0;
1855 		new_task->task_writes_counters_internal.task_deferred_writes = 0;
1856 		new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1857 		new_task->task_writes_counters_internal.task_metadata_writes = 0;
1858 		new_task->task_writes_counters_external.task_immediate_writes = 0;
1859 		new_task->task_writes_counters_external.task_deferred_writes = 0;
1860 		new_task->task_writes_counters_external.task_invalidated_writes = 0;
1861 		new_task->task_writes_counters_external.task_metadata_writes = 0;
1862 #if CONFIG_PHYS_WRITE_ACCT
1863 		new_task->task_fs_metadata_writes = 0;
1864 #endif /* CONFIG_PHYS_WRITE_ACCT */
1865 	}
1866 
1867 
1868 	new_task->donates_own_pages = FALSE;
1869 #if CONFIG_COALITIONS
1870 	if (!(t_flags & TF_CORPSE_FORK)) {
1871 		/* TODO: there is no graceful failure path here... */
1872 		if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1873 			coalitions_adopt_task(parent_coalitions, new_task);
1874 			if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1875 				new_task->donates_own_pages = coalition_is_swappable(parent_coalitions[COALITION_TYPE_JETSAM]);
1876 			}
1877 		} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1878 			/*
1879 			 * all tasks at least have a resource coalition, so
1880 			 * if the parent has one then inherit all coalitions
1881 			 * the parent is a part of
1882 			 */
1883 			coalitions_adopt_task(parent_task->coalition, new_task);
1884 			if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1885 				new_task->donates_own_pages = coalition_is_swappable(parent_task->coalition[COALITION_TYPE_JETSAM]);
1886 			}
1887 		} else {
1888 			/* TODO: assert that new_task will be PID 1 (launchd) */
1889 			coalitions_adopt_init_task(new_task);
1890 		}
1891 		/*
1892 		 * on exec, we need to transfer the coalition roles from the
1893 		 * parent task to the exec copy task.
1894 		 */
1895 		if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1896 			int coal_roles[COALITION_NUM_TYPES];
1897 			task_coalition_roles(parent_task, coal_roles);
1898 			(void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1899 		}
1900 	} else {
1901 		coalitions_adopt_corpse_task(new_task);
1902 	}
1903 
1904 	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1905 		panic("created task is not a member of a resource coalition");
1906 	}
1907 	task_set_coalition_member(new_task);
1908 #endif /* CONFIG_COALITIONS */
1909 
1910 	new_task->dispatchqueue_offset = 0;
1911 	if (parent_task != NULL) {
1912 		new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1913 	}
1914 
1915 	new_task->task_can_transfer_memory_ownership = FALSE;
1916 	new_task->task_volatile_objects = 0;
1917 	new_task->task_nonvolatile_objects = 0;
1918 	new_task->task_objects_disowning = FALSE;
1919 	new_task->task_objects_disowned = FALSE;
1920 	new_task->task_owned_objects = 0;
1921 	queue_init(&new_task->task_objq);
1922 
1923 #if CONFIG_FREEZE
1924 	queue_init(&new_task->task_frozen_cseg_q);
1925 #endif /* CONFIG_FREEZE */
1926 
1927 	task_objq_lock_init(new_task);
1928 
1929 #if __arm64__
1930 	new_task->task_legacy_footprint = FALSE;
1931 	new_task->task_extra_footprint_limit = FALSE;
1932 	new_task->task_ios13extended_footprint_limit = FALSE;
1933 #endif /* __arm64__ */
1934 	new_task->task_region_footprint = FALSE;
1935 	new_task->task_has_crossed_thread_limit = FALSE;
1936 	new_task->task_thread_limit = 0;
1937 #if CONFIG_SECLUDED_MEMORY
1938 	new_task->task_can_use_secluded_mem = FALSE;
1939 	new_task->task_could_use_secluded_mem = FALSE;
1940 	new_task->task_could_also_use_secluded_mem = FALSE;
1941 	new_task->task_suppressed_secluded = FALSE;
1942 #endif /* CONFIG_SECLUDED_MEMORY */
1943 
1944 	/*
1945 	 * t_flags is set up above. But since we don't
1946 	 * support darkwake mode being set that way
1947 	 * currently, we clear it out here explicitly.
1948 	 */
1949 	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1950 
1951 	queue_init(&new_task->io_user_clients);
1952 	new_task->loadTag = 0;
1953 
1954 	lck_mtx_lock(&tasks_threads_lock);
1955 	queue_enter(&tasks, new_task, task_t, tasks);
1956 	tasks_count++;
1957 	if (tasks_suspend_state) {
1958 		task_suspend_internal(new_task);
1959 	}
1960 	lck_mtx_unlock(&tasks_threads_lock);
1961 	task_ref_hold_proc_task_struct(new_task);
1962 
1963 	return KERN_SUCCESS;
1964 }
1965 
1966 /*
1967  *	task_rollup_accounting_info
1968  *
1969  *	Roll up accounting stats. Used to rollup stats
1970  *	for exec copy task and corpse fork.
1971  */
1972 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1973 task_rollup_accounting_info(task_t to_task, task_t from_task)
1974 {
1975 	assert(from_task != to_task);
1976 
1977 	recount_task_copy(&to_task->tk_recount, &from_task->tk_recount);
1978 	to_task->total_runnable_time = from_task->total_runnable_time;
1979 	counter_add(&to_task->faults, counter_load(&from_task->faults));
1980 	counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1981 	counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1982 	counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1983 	counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1984 	to_task->decompressions = from_task->decompressions;
1985 	to_task->syscalls_mach = from_task->syscalls_mach;
1986 	to_task->syscalls_unix = from_task->syscalls_unix;
1987 	to_task->c_switch = from_task->c_switch;
1988 	to_task->p_switch = from_task->p_switch;
1989 	to_task->ps_switch = from_task->ps_switch;
1990 	to_task->extmod_statistics = from_task->extmod_statistics;
1991 	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1992 	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1993 	to_task->purged_memory_warn = from_task->purged_memory_warn;
1994 	to_task->purged_memory_critical = from_task->purged_memory_critical;
1995 	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1996 	*to_task->task_io_stats = *from_task->task_io_stats;
1997 	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1998 	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1999 	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
2000 	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
2001 	to_task->task_gpu_ns = from_task->task_gpu_ns;
2002 	to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
2003 	to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
2004 	to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
2005 	to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
2006 	to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
2007 	to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
2008 	to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
2009 	to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
2010 #if CONFIG_PHYS_WRITE_ACCT
2011 	to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
2012 #endif /* CONFIG_PHYS_WRITE_ACCT */
2013 
2014 #if CONFIG_MEMORYSTATUS
2015 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
2016 #endif /* CONFIG_MEMORYSTATUS */
2017 
2018 	/* Skip ledger roll up for memory accounting entries */
2019 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
2020 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
2021 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
2022 #if CONFIG_SCHED_SFI
2023 	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
2024 		ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
2025 	}
2026 #endif
2027 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
2028 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
2029 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
2030 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
2031 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
2032 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
2033 }
2034 
2035 /*
2036  *	task_deallocate_internal:
2037  *
2038  *	Drop a reference on a task.
2039  *	Don't call this directly.
2040  */
2041 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
2042 void
task_deallocate_internal(task_t task,os_ref_count_t refs)2043 task_deallocate_internal(
2044 	task_t          task,
2045 	os_ref_count_t  refs)
2046 {
2047 	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
2048 
2049 	if (task == TASK_NULL) {
2050 		return;
2051 	}
2052 
2053 #if IMPORTANCE_INHERITANCE
2054 	if (refs == 1) {
2055 		/*
2056 		 * If last ref potentially comes from the task's importance,
2057 		 * disconnect it.  But more task refs may be added before
2058 		 * that completes, so wait for the reference to go to zero
2059 		 * naturally (it may happen on a recursive task_deallocate()
2060 		 * from the ipc_importance_disconnect_task() call).
2061 		 */
2062 		if (IIT_NULL != task->task_imp_base) {
2063 			ipc_importance_disconnect_task(task);
2064 		}
2065 		return;
2066 	}
2067 #endif /* IMPORTANCE_INHERITANCE */
2068 
2069 	if (refs > 0) {
2070 		return;
2071 	}
2072 
2073 	/*
2074 	 * The task should be dead at this point. Ensure other resources
2075 	 * like threads, are gone before we trash the world.
2076 	 */
2077 	assert(queue_empty(&task->threads));
2078 	assert(get_bsdtask_info(task) == NULL);
2079 	assert(!is_active(task->itk_space));
2080 	assert(!task->active);
2081 	assert(task->active_thread_count == 0);
2082 	assert(!task_get_game_mode(task));
2083 
2084 	lck_mtx_lock(&tasks_threads_lock);
2085 	assert(terminated_tasks_count > 0);
2086 	queue_remove(&terminated_tasks, task, task_t, tasks);
2087 	terminated_tasks_count--;
2088 	lck_mtx_unlock(&tasks_threads_lock);
2089 
2090 	/*
2091 	 * remove the reference on bank context
2092 	 */
2093 	task_bank_reset(task);
2094 
2095 	kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
2096 
2097 	/*
2098 	 *	Give the machine dependent code a chance
2099 	 *	to perform cleanup before ripping apart
2100 	 *	the task.
2101 	 */
2102 	machine_task_terminate(task);
2103 
2104 	ipc_task_terminate(task);
2105 
2106 	/* let iokit know 2 */
2107 	iokit_task_terminate(task, 2);
2108 
2109 	/* Unregister task from userspace coredumps on panic */
2110 	kern_unregister_userspace_coredump(task);
2111 
2112 	if (task->affinity_space) {
2113 		task_affinity_deallocate(task);
2114 	}
2115 
2116 #if MACH_ASSERT
2117 	if (task->ledger != NULL &&
2118 	    task->map != NULL &&
2119 	    task->map->pmap != NULL &&
2120 	    task->map->pmap->ledger != NULL) {
2121 		assert(task->ledger == task->map->pmap->ledger);
2122 	}
2123 #endif /* MACH_ASSERT */
2124 
2125 	vm_owned_objects_disown(task);
2126 	assert(task->task_objects_disowned);
2127 	if (task->task_owned_objects != 0) {
2128 		panic("task_deallocate(%p): "
2129 		    "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
2130 		    task,
2131 		    task->task_volatile_objects,
2132 		    task->task_nonvolatile_objects,
2133 		    task->task_owned_objects);
2134 	}
2135 
2136 #if CONFIG_DEFERRED_RECLAIM
2137 	if (task->deferred_reclamation_metadata != NULL) {
2138 		vm_deferred_reclamation_buffer_deallocate(task->deferred_reclamation_metadata);
2139 		task->deferred_reclamation_metadata = NULL;
2140 	}
2141 #endif /* CONFIG_DEFERRED_RECLAIM */
2142 
2143 	vm_map_deallocate(task->map);
2144 	if (task->is_large_corpse) {
2145 		assert(large_corpse_count > 0);
2146 		OSDecrementAtomic(&large_corpse_count);
2147 		task->is_large_corpse = false;
2148 	}
2149 	is_release(task->itk_space);
2150 
2151 	if (task->t_rr_ranges) {
2152 		restartable_ranges_release(task->t_rr_ranges);
2153 	}
2154 
2155 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2156 	    &interrupt_wakeups, &debit);
2157 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2158 	    &platform_idle_wakeups, &debit);
2159 
2160 #if defined(CONFIG_SCHED_MULTIQ)
2161 	sched_group_destroy(task->sched_group);
2162 #endif
2163 
2164 	struct recount_times_mach sum = { 0 };
2165 	struct recount_times_mach p_only = { 0 };
2166 	recount_task_times_perf_only(task, &sum, &p_only);
2167 #if CONFIG_PERVASIVE_ENERGY
2168 	uint64_t energy = recount_task_energy_nj(task);
2169 #endif /* CONFIG_PERVASIVE_ENERGY */
2170 	recount_task_deinit(&task->tk_recount);
2171 
2172 	/* Accumulate statistics for dead tasks */
2173 	lck_spin_lock(&dead_task_statistics_lock);
2174 	dead_task_statistics.total_user_time += sum.rtm_user;
2175 	dead_task_statistics.total_system_time += sum.rtm_system;
2176 
2177 	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2178 	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2179 
2180 	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2181 	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2182 	dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2183 	dead_task_statistics.total_pset_switches += task->ps_switch;
2184 	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2185 #if CONFIG_PERVASIVE_ENERGY
2186 	dead_task_statistics.task_energy += energy;
2187 #endif /* CONFIG_PERVASIVE_ENERGY */
2188 
2189 	lck_spin_unlock(&dead_task_statistics_lock);
2190 	lck_mtx_destroy(&task->lock, &task_lck_grp);
2191 
2192 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
2193 	    &debit)) {
2194 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2195 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2196 	}
2197 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
2198 	    &debit)) {
2199 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2200 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2201 	}
2202 	ledger_dereference(task->ledger);
2203 
2204 	counter_free(&task->faults);
2205 	counter_free(&task->pageins);
2206 	counter_free(&task->cow_faults);
2207 	counter_free(&task->messages_sent);
2208 	counter_free(&task->messages_received);
2209 
2210 #if CONFIG_COALITIONS
2211 	task_release_coalitions(task);
2212 #endif /* CONFIG_COALITIONS */
2213 
2214 	bzero(task->coalition, sizeof(task->coalition));
2215 
2216 #if MACH_BSD
2217 	/* clean up collected information since last reference to task is gone */
2218 	if (task->corpse_info) {
2219 		void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
2220 		task_crashinfo_destroy(task->corpse_info);
2221 		task->corpse_info = NULL;
2222 		kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2223 	}
2224 #endif
2225 
2226 #if CONFIG_MACF
2227 	if (get_task_crash_label(task)) {
2228 		mac_exc_free_label(get_task_crash_label(task));
2229 		set_task_crash_label(task, NULL);
2230 	}
2231 #endif
2232 
2233 	assert(queue_empty(&task->task_objq));
2234 	task_objq_lock_destroy(task);
2235 
2236 	if (task->corpse_vmobject_list) {
2237 		kfree_data(task->corpse_vmobject_list,
2238 		    (vm_size_t)task->corpse_vmobject_list_size);
2239 	}
2240 
2241 	task_ref_count_fini(task);
2242 	proc_ro_erase_task(task->bsd_info_ro);
2243 	task_release_proc_task_struct(task, task->bsd_info_ro);
2244 }
2245 
2246 /*
2247  *	task_name_deallocate_mig:
2248  *
2249  *	Drop a reference on a task name.
2250  */
2251 void
task_name_deallocate_mig(task_name_t task_name)2252 task_name_deallocate_mig(
2253 	task_name_t             task_name)
2254 {
2255 	return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2256 }
2257 
2258 /*
2259  *	task_policy_set_deallocate_mig:
2260  *
2261  *	Drop a reference on a task type.
2262  */
2263 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2264 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2265 {
2266 	return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2267 }
2268 
2269 /*
2270  *	task_policy_get_deallocate_mig:
2271  *
2272  *	Drop a reference on a task type.
2273  */
2274 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2275 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2276 {
2277 	return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2278 }
2279 
2280 /*
2281  *	task_inspect_deallocate_mig:
2282  *
2283  *	Drop a task inspection reference.
2284  */
2285 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2286 task_inspect_deallocate_mig(
2287 	task_inspect_t          task_inspect)
2288 {
2289 	return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2290 }
2291 
2292 /*
2293  *	task_read_deallocate_mig:
2294  *
2295  *	Drop a reference on task read port.
2296  */
2297 void
task_read_deallocate_mig(task_read_t task_read)2298 task_read_deallocate_mig(
2299 	task_read_t          task_read)
2300 {
2301 	return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2302 }
2303 
2304 /*
2305  *	task_suspension_token_deallocate:
2306  *
2307  *	Drop a reference on a task suspension token.
2308  */
2309 void
task_suspension_token_deallocate(task_suspension_token_t token)2310 task_suspension_token_deallocate(
2311 	task_suspension_token_t         token)
2312 {
2313 	return task_deallocate((task_t)token);
2314 }
2315 
2316 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2317 task_suspension_token_deallocate_grp(
2318 	task_suspension_token_t         token,
2319 	task_grp_t                      grp)
2320 {
2321 	return task_deallocate_grp((task_t)token, grp);
2322 }
2323 
2324 /*
2325  * task_collect_crash_info:
2326  *
2327  * collect crash info from bsd and mach based data
2328  */
2329 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2330 task_collect_crash_info(
2331 	task_t task,
2332 #ifdef CONFIG_MACF
2333 	struct label *crash_label,
2334 #endif
2335 	int is_corpse_fork)
2336 {
2337 	kern_return_t kr = KERN_SUCCESS;
2338 
2339 	kcdata_descriptor_t crash_data = NULL;
2340 	kcdata_descriptor_t crash_data_release = NULL;
2341 	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2342 	mach_vm_offset_t crash_data_ptr = 0;
2343 	void *crash_data_kernel = NULL;
2344 	void *crash_data_kernel_release = NULL;
2345 #if CONFIG_MACF
2346 	struct label *label, *free_label;
2347 #endif
2348 
2349 	if (!corpses_enabled()) {
2350 		return KERN_NOT_SUPPORTED;
2351 	}
2352 
2353 #if CONFIG_MACF
2354 	free_label = label = mac_exc_create_label(NULL);
2355 #endif
2356 
2357 	task_lock(task);
2358 
2359 	assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2360 	if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2361 #if CONFIG_MACF
2362 		/* Set the crash label, used by the exception delivery mac hook */
2363 		free_label = get_task_crash_label(task);         // Most likely NULL.
2364 		set_task_crash_label(task, label);
2365 		mac_exc_update_task_crash_label(task, crash_label);
2366 #endif
2367 		task_unlock(task);
2368 
2369 		crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2370 		    Z_WAITOK | Z_ZERO);
2371 		if (crash_data_kernel == NULL) {
2372 			kr = KERN_RESOURCE_SHORTAGE;
2373 			goto out_no_lock;
2374 		}
2375 		crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2376 
2377 		/* Do not get a corpse ref for corpse fork */
2378 		crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2379 		    is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2380 		    KCFLAG_USE_MEMCOPY);
2381 		if (crash_data) {
2382 			task_lock(task);
2383 			crash_data_release = task->corpse_info;
2384 			crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2385 			task->corpse_info = crash_data;
2386 
2387 			task_unlock(task);
2388 			kr = KERN_SUCCESS;
2389 		} else {
2390 			kfree_data(crash_data_kernel,
2391 			    CORPSEINFO_ALLOCATION_SIZE);
2392 			kr = KERN_FAILURE;
2393 		}
2394 
2395 		if (crash_data_release != NULL) {
2396 			task_crashinfo_destroy(crash_data_release);
2397 		}
2398 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2399 	} else {
2400 		task_unlock(task);
2401 	}
2402 
2403 out_no_lock:
2404 #if CONFIG_MACF
2405 	if (free_label != NULL) {
2406 		mac_exc_free_label(free_label);
2407 	}
2408 #endif
2409 	return kr;
2410 }
2411 
2412 /*
2413  * task_deliver_crash_notification:
2414  *
2415  * Makes outcall to registered host port for a corpse.
2416  */
2417 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2418 task_deliver_crash_notification(
2419 	task_t corpse, /* corpse or corpse fork */
2420 	thread_t thread,
2421 	exception_type_t etype,
2422 	mach_exception_subcode_t subcode)
2423 {
2424 	kcdata_descriptor_t crash_info = corpse->corpse_info;
2425 	thread_t th_iter = NULL;
2426 	kern_return_t kr = KERN_SUCCESS;
2427 	wait_interrupt_t wsave;
2428 	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2429 	ipc_port_t corpse_port;
2430 
2431 	if (crash_info == NULL) {
2432 		return KERN_FAILURE;
2433 	}
2434 
2435 	assert(task_is_a_corpse(corpse));
2436 
2437 	task_lock(corpse);
2438 
2439 	/*
2440 	 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2441 	 * Crash reporters should derive whether it's fatal from corpse blob.
2442 	 */
2443 	code[0] = etype;
2444 	code[1] = subcode;
2445 
2446 	queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2447 	{
2448 		if (th_iter->corpse_dup == FALSE) {
2449 			ipc_thread_reset(th_iter);
2450 		}
2451 	}
2452 	task_unlock(corpse);
2453 
2454 	/* Arm the no-sender notification for taskport */
2455 	task_reference(corpse);
2456 	corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2457 
2458 	wsave = thread_interrupt_level(THREAD_UNINT);
2459 	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2460 	if (kr != KERN_SUCCESS) {
2461 		printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2462 	}
2463 
2464 	(void)thread_interrupt_level(wsave);
2465 
2466 	/*
2467 	 * Drop the send right on corpse port, will fire the
2468 	 * no-sender notification if exception deliver failed.
2469 	 */
2470 	ipc_port_release_send(corpse_port);
2471 	return kr;
2472 }
2473 
2474 /*
2475  *	task_terminate:
2476  *
2477  *	Terminate the specified task.  See comments on thread_terminate
2478  *	(kern/thread.c) about problems with terminating the "current task."
2479  */
2480 
2481 kern_return_t
task_terminate(task_t task)2482 task_terminate(
2483 	task_t          task)
2484 {
2485 	if (task == TASK_NULL) {
2486 		return KERN_INVALID_ARGUMENT;
2487 	}
2488 
2489 	if (get_bsdtask_info(task)) {
2490 		return KERN_FAILURE;
2491 	}
2492 
2493 	return task_terminate_internal(task);
2494 }
2495 
2496 #if MACH_ASSERT
2497 extern int proc_pid(struct proc *);
2498 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2499 #endif /* MACH_ASSERT */
2500 
2501 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2502 static void
task_partial_reap(task_t task,__unused int pid)2503 __unused task_partial_reap(task_t task, __unused int pid)
2504 {
2505 	unsigned int    reclaimed_resident = 0;
2506 	unsigned int    reclaimed_compressed = 0;
2507 	uint64_t        task_page_count;
2508 
2509 	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2510 
2511 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2512 	    pid, task_page_count, 0, 0, 0);
2513 
2514 	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2515 
2516 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2517 	    pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2518 }
2519 
2520 /*
2521  * task_mark_corpse:
2522  *
2523  * Mark the task as a corpse. Called by crashing thread.
2524  */
2525 kern_return_t
task_mark_corpse(task_t task)2526 task_mark_corpse(task_t task)
2527 {
2528 	kern_return_t kr = KERN_SUCCESS;
2529 	thread_t self_thread;
2530 	(void) self_thread;
2531 	wait_interrupt_t wsave;
2532 #if CONFIG_MACF
2533 	struct label *crash_label = NULL;
2534 #endif
2535 
2536 	assert(task != kernel_task);
2537 	assert(task == current_task());
2538 	assert(!task_is_a_corpse(task));
2539 
2540 #if CONFIG_MACF
2541 	crash_label = mac_exc_create_label_for_proc((struct proc*)get_bsdtask_info(task));
2542 #endif
2543 
2544 	kr = task_collect_crash_info(task,
2545 #if CONFIG_MACF
2546 	    crash_label,
2547 #endif
2548 	    FALSE);
2549 	if (kr != KERN_SUCCESS) {
2550 		goto out;
2551 	}
2552 
2553 	self_thread = current_thread();
2554 
2555 	wsave = thread_interrupt_level(THREAD_UNINT);
2556 	task_lock(task);
2557 
2558 	/*
2559 	 * Check if any other thread called task_terminate_internal
2560 	 * and made the task inactive before we could mark it for
2561 	 * corpse pending report. Bail out if the task is inactive.
2562 	 */
2563 	if (!task->active) {
2564 		kcdata_descriptor_t crash_data_release = task->corpse_info;;
2565 		void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2566 
2567 		task->corpse_info = NULL;
2568 		task_unlock(task);
2569 
2570 		if (crash_data_release != NULL) {
2571 			task_crashinfo_destroy(crash_data_release);
2572 		}
2573 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2574 		return KERN_TERMINATED;
2575 	}
2576 
2577 	task_set_corpse_pending_report(task);
2578 	task_set_corpse(task);
2579 	task->crashed_thread_id = thread_tid(self_thread);
2580 
2581 	kr = task_start_halt_locked(task, TRUE);
2582 	assert(kr == KERN_SUCCESS);
2583 
2584 	task_set_uniqueid(task);
2585 
2586 	task_unlock(task);
2587 
2588 	/*
2589 	 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2590 	 * disable old ports here instead.
2591 	 *
2592 	 * The vm_map and ipc_space must exist until this function returns,
2593 	 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2594 	 */
2595 	ipc_task_disable(task);
2596 
2597 	/* let iokit know 1 */
2598 	iokit_task_terminate(task, 1);
2599 
2600 	/* terminate the ipc space */
2601 	ipc_space_terminate(task->itk_space);
2602 
2603 	/* Add it to global corpse task list */
2604 	task_add_to_corpse_task_list(task);
2605 
2606 	thread_terminate_internal(self_thread);
2607 
2608 	(void) thread_interrupt_level(wsave);
2609 	assert(task->halting == TRUE);
2610 
2611 out:
2612 #if CONFIG_MACF
2613 	mac_exc_free_label(crash_label);
2614 #endif
2615 	return kr;
2616 }
2617 
2618 /*
2619  *	task_set_uniqueid
2620  *
2621  *	Set task uniqueid to systemwide unique 64 bit value
2622  */
2623 void
task_set_uniqueid(task_t task)2624 task_set_uniqueid(task_t task)
2625 {
2626 	task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2627 }
2628 
2629 /*
2630  *	task_clear_corpse
2631  *
2632  *	Clears the corpse pending bit on task.
2633  *	Removes inspection bit on the threads.
2634  */
2635 void
task_clear_corpse(task_t task)2636 task_clear_corpse(task_t task)
2637 {
2638 	thread_t th_iter = NULL;
2639 
2640 	task_lock(task);
2641 	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2642 	{
2643 		thread_mtx_lock(th_iter);
2644 		th_iter->inspection = FALSE;
2645 		ipc_thread_disable(th_iter);
2646 		thread_mtx_unlock(th_iter);
2647 	}
2648 
2649 	thread_terminate_crashed_threads();
2650 	/* remove the pending corpse report flag */
2651 	task_clear_corpse_pending_report(task);
2652 
2653 	task_unlock(task);
2654 }
2655 
2656 /*
2657  *	task_port_no_senders
2658  *
2659  *	Called whenever the Mach port system detects no-senders on
2660  *	the task port of a corpse.
2661  *	Each notification that comes in should terminate the task (corpse).
2662  */
2663 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2664 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2665 {
2666 	task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2667 
2668 	assert(task != TASK_NULL);
2669 	assert(task_is_a_corpse(task));
2670 
2671 	/* Remove the task from global corpse task list */
2672 	task_remove_from_corpse_task_list(task);
2673 
2674 	task_clear_corpse(task);
2675 	vm_map_unset_corpse_source(task->map);
2676 	task_terminate_internal(task);
2677 }
2678 
2679 /*
2680  *	task_port_with_flavor_no_senders
2681  *
2682  *	Called whenever the Mach port system detects no-senders on
2683  *	the task inspect or read port. These ports are allocated lazily and
2684  *	should be deallocated here when there are no senders remaining.
2685  */
2686 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2687 task_port_with_flavor_no_senders(
2688 	ipc_port_t          port,
2689 	mach_port_mscount_t mscount __unused)
2690 {
2691 	task_t task;
2692 	mach_task_flavor_t flavor;
2693 	ipc_kobject_type_t kotype;
2694 
2695 	ip_mq_lock(port);
2696 	if (port->ip_srights > 0) {
2697 		ip_mq_unlock(port);
2698 		return;
2699 	}
2700 	kotype = ip_kotype(port);
2701 	assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2702 	task = ipc_kobject_get_locked(port, kotype);
2703 	if (task != TASK_NULL) {
2704 		task_reference(task);
2705 	}
2706 	ip_mq_unlock(port);
2707 
2708 	if (task == TASK_NULL) {
2709 		/* The task is exiting or disabled; it will eventually deallocate the port */
2710 		return;
2711 	}
2712 
2713 	if (kotype == IKOT_TASK_READ) {
2714 		flavor = TASK_FLAVOR_READ;
2715 	} else {
2716 		flavor = TASK_FLAVOR_INSPECT;
2717 	}
2718 
2719 	itk_lock(task);
2720 	ip_mq_lock(port);
2721 
2722 	/*
2723 	 * If the port is no longer active, then ipc_task_terminate() ran
2724 	 * and destroyed the kobject already. Just deallocate the task
2725 	 * ref we took and go away.
2726 	 *
2727 	 * It is also possible that several nsrequests are in flight,
2728 	 * only one shall NULL-out the port entry, and this is the one
2729 	 * that gets to dealloc the port.
2730 	 *
2731 	 * Check for a stale no-senders notification. A call to any function
2732 	 * that vends out send rights to this port could resurrect it between
2733 	 * this notification being generated and actually being handled here.
2734 	 */
2735 	if (!ip_active(port) ||
2736 	    task->itk_task_ports[flavor] != port ||
2737 	    port->ip_srights > 0) {
2738 		ip_mq_unlock(port);
2739 		itk_unlock(task);
2740 		task_deallocate(task);
2741 		return;
2742 	}
2743 
2744 	assert(task->itk_task_ports[flavor] == port);
2745 	task->itk_task_ports[flavor] = IP_NULL;
2746 	itk_unlock(task);
2747 
2748 	ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2749 
2750 	task_deallocate(task);
2751 }
2752 
2753 /*
2754  *	task_wait_till_threads_terminate_locked
2755  *
2756  *	Wait till all the threads in the task are terminated.
2757  *	Might release the task lock and re-acquire it.
2758  */
2759 void
task_wait_till_threads_terminate_locked(task_t task)2760 task_wait_till_threads_terminate_locked(task_t task)
2761 {
2762 	/* wait for all the threads in the task to terminate */
2763 	while (task->active_thread_count != 0) {
2764 		assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2765 		task_unlock(task);
2766 		thread_block(THREAD_CONTINUE_NULL);
2767 
2768 		task_lock(task);
2769 	}
2770 }
2771 
2772 /*
2773  *	task_duplicate_map_and_threads
2774  *
2775  *	Copy vmmap of source task.
2776  *	Copy active threads from source task to destination task.
2777  *	Source task would be suspended during the copy.
2778  */
2779 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2780 task_duplicate_map_and_threads(
2781 	task_t task,
2782 	void *p,
2783 	task_t new_task,
2784 	thread_t *thread_ret,
2785 	uint64_t **udata_buffer,
2786 	int *size,
2787 	int *num_udata,
2788 	bool for_exception)
2789 {
2790 	kern_return_t kr = KERN_SUCCESS;
2791 	int active;
2792 	thread_t thread, self, thread_return = THREAD_NULL;
2793 	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2794 	thread_t *thread_array;
2795 	uint32_t active_thread_count = 0, array_count = 0, i;
2796 	vm_map_t oldmap;
2797 	uint64_t *buffer = NULL;
2798 	int buf_size = 0;
2799 	int est_knotes = 0, num_knotes = 0;
2800 
2801 	self = current_thread();
2802 
2803 	/*
2804 	 * Suspend the task to copy thread state, use the internal
2805 	 * variant so that no user-space process can resume
2806 	 * the task from under us
2807 	 */
2808 	kr = task_suspend_internal(task);
2809 	if (kr != KERN_SUCCESS) {
2810 		return kr;
2811 	}
2812 
2813 	if (task->map->disable_vmentry_reuse == TRUE) {
2814 		/*
2815 		 * Quite likely GuardMalloc (or some debugging tool)
2816 		 * is being used on this task. And it has gone through
2817 		 * its limit. Making a corpse will likely encounter
2818 		 * a lot of VM entries that will need COW.
2819 		 *
2820 		 * Skip it.
2821 		 */
2822 #if DEVELOPMENT || DEBUG
2823 		memorystatus_abort_vm_map_fork(task);
2824 #endif
2825 		ktriage_record(thread_tid(self), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CORPSE, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CORPSE_FAIL_LIBGMALLOC), 0 /* arg */);
2826 		task_resume_internal(task);
2827 		return KERN_FAILURE;
2828 	}
2829 
2830 	/* Check with VM if vm_map_fork is allowed for this task */
2831 	bool is_large = false;
2832 	if (memorystatus_allowed_vm_map_fork(task, &is_large)) {
2833 		/* Setup new task's vmmap, switch from parent task's map to it COW map */
2834 		oldmap = new_task->map;
2835 		new_task->map = vm_map_fork(new_task->ledger,
2836 		    task->map,
2837 		    (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2838 		    VM_MAP_FORK_PRESERVE_PURGEABLE |
2839 		    VM_MAP_FORK_CORPSE_FOOTPRINT));
2840 		if (new_task->map) {
2841 			new_task->is_large_corpse = is_large;
2842 			vm_map_deallocate(oldmap);
2843 
2844 			/* copy ledgers that impact the memory footprint */
2845 			vm_map_copy_footprint_ledgers(task, new_task);
2846 
2847 			/* Get all the udata pointers from kqueue */
2848 			est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2849 			if (est_knotes > 0) {
2850 				buf_size = (est_knotes + 32) * sizeof(uint64_t);
2851 				buffer = kalloc_data(buf_size, Z_WAITOK);
2852 				num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2853 				if (num_knotes > est_knotes + 32) {
2854 					num_knotes = est_knotes + 32;
2855 				}
2856 			}
2857 		} else {
2858 			if (is_large) {
2859 				assert(large_corpse_count > 0);
2860 				OSDecrementAtomic(&large_corpse_count);
2861 			}
2862 			new_task->map = oldmap;
2863 #if DEVELOPMENT || DEBUG
2864 			memorystatus_abort_vm_map_fork(task);
2865 #endif
2866 			task_resume_internal(task);
2867 			return KERN_NO_SPACE;
2868 		}
2869 	} else if (!for_exception) {
2870 #if DEVELOPMENT || DEBUG
2871 		memorystatus_abort_vm_map_fork(task);
2872 #endif
2873 		task_resume_internal(task);
2874 		return KERN_NO_SPACE;
2875 	}
2876 
2877 	active_thread_count = task->active_thread_count;
2878 	if (active_thread_count == 0) {
2879 		kfree_data(buffer, buf_size);
2880 		task_resume_internal(task);
2881 		return KERN_FAILURE;
2882 	}
2883 
2884 	thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2885 
2886 	/* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2887 	task_lock(task);
2888 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2889 		/* Skip inactive threads */
2890 		active = thread->active;
2891 		if (!active) {
2892 			continue;
2893 		}
2894 
2895 		if (array_count >= active_thread_count) {
2896 			break;
2897 		}
2898 
2899 		thread_array[array_count++] = thread;
2900 		thread_reference(thread);
2901 	}
2902 	task_unlock(task);
2903 
2904 	for (i = 0; i < array_count; i++) {
2905 		kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2906 		if (kr != KERN_SUCCESS) {
2907 			break;
2908 		}
2909 
2910 		/* Equivalent of current thread in corpse */
2911 		if (thread_array[i] == self) {
2912 			thread_return = new_thread;
2913 			new_task->crashed_thread_id = thread_tid(new_thread);
2914 		} else if (first_thread == NULL) {
2915 			first_thread = new_thread;
2916 		} else {
2917 			/* drop the extra ref returned by thread_create_with_continuation */
2918 			thread_deallocate(new_thread);
2919 		}
2920 
2921 		kr = thread_dup2(thread_array[i], new_thread);
2922 		if (kr != KERN_SUCCESS) {
2923 			thread_mtx_lock(new_thread);
2924 			new_thread->corpse_dup = TRUE;
2925 			thread_mtx_unlock(new_thread);
2926 			continue;
2927 		}
2928 
2929 		/* Copy thread name */
2930 		bsd_copythreadname(get_bsdthread_info(new_thread),
2931 		    get_bsdthread_info(thread_array[i]));
2932 		new_thread->thread_tag = thread_array[i]->thread_tag &
2933 		    ~THREAD_TAG_USER_JOIN;
2934 		thread_copy_resource_info(new_thread, thread_array[i]);
2935 	}
2936 
2937 	/* return the first thread if we couldn't find the equivalent of current */
2938 	if (thread_return == THREAD_NULL) {
2939 		thread_return = first_thread;
2940 	} else if (first_thread != THREAD_NULL) {
2941 		/* drop the extra ref returned by thread_create_with_continuation */
2942 		thread_deallocate(first_thread);
2943 	}
2944 
2945 	task_resume_internal(task);
2946 
2947 	for (i = 0; i < array_count; i++) {
2948 		thread_deallocate(thread_array[i]);
2949 	}
2950 	kfree_type(thread_t, active_thread_count, thread_array);
2951 
2952 	if (kr == KERN_SUCCESS) {
2953 		*thread_ret = thread_return;
2954 		*udata_buffer = buffer;
2955 		*size = buf_size;
2956 		*num_udata = num_knotes;
2957 	} else {
2958 		if (thread_return != THREAD_NULL) {
2959 			thread_deallocate(thread_return);
2960 		}
2961 		kfree_data(buffer, buf_size);
2962 	}
2963 
2964 	return kr;
2965 }
2966 
2967 #if CONFIG_SECLUDED_MEMORY
2968 extern void task_set_can_use_secluded_mem_locked(
2969 	task_t          task,
2970 	boolean_t       can_use_secluded_mem);
2971 #endif /* CONFIG_SECLUDED_MEMORY */
2972 
2973 #if MACH_ASSERT
2974 int debug4k_panic_on_terminate = 0;
2975 #endif /* MACH_ASSERT */
2976 kern_return_t
task_terminate_internal(task_t task)2977 task_terminate_internal(
2978 	task_t                  task)
2979 {
2980 	thread_t                        thread, self;
2981 	task_t                          self_task;
2982 	boolean_t                       interrupt_save;
2983 	int                             pid = 0;
2984 
2985 	assert(task != kernel_task);
2986 
2987 	self = current_thread();
2988 	self_task = current_task();
2989 
2990 	/*
2991 	 *	Get the task locked and make sure that we are not racing
2992 	 *	with someone else trying to terminate us.
2993 	 */
2994 	if (task == self_task) {
2995 		task_lock(task);
2996 	} else if (task < self_task) {
2997 		task_lock(task);
2998 		task_lock(self_task);
2999 	} else {
3000 		task_lock(self_task);
3001 		task_lock(task);
3002 	}
3003 
3004 #if CONFIG_SECLUDED_MEMORY
3005 	if (task->task_can_use_secluded_mem) {
3006 		task_set_can_use_secluded_mem_locked(task, FALSE);
3007 	}
3008 	task->task_could_use_secluded_mem = FALSE;
3009 	task->task_could_also_use_secluded_mem = FALSE;
3010 
3011 	if (task->task_suppressed_secluded) {
3012 		stop_secluded_suppression(task);
3013 	}
3014 #endif /* CONFIG_SECLUDED_MEMORY */
3015 
3016 	if (!task->active) {
3017 		/*
3018 		 *	Task is already being terminated.
3019 		 *	Just return an error. If we are dying, this will
3020 		 *	just get us to our AST special handler and that
3021 		 *	will get us to finalize the termination of ourselves.
3022 		 */
3023 		task_unlock(task);
3024 		if (self_task != task) {
3025 			task_unlock(self_task);
3026 		}
3027 
3028 		return KERN_FAILURE;
3029 	}
3030 
3031 	if (task_corpse_pending_report(task)) {
3032 		/*
3033 		 *	Task is marked for reporting as corpse.
3034 		 *	Just return an error. This will
3035 		 *	just get us to our AST special handler and that
3036 		 *	will get us to finish the path to death
3037 		 */
3038 		task_unlock(task);
3039 		if (self_task != task) {
3040 			task_unlock(self_task);
3041 		}
3042 
3043 		return KERN_FAILURE;
3044 	}
3045 
3046 	if (self_task != task) {
3047 		task_unlock(self_task);
3048 	}
3049 
3050 	/*
3051 	 * Make sure the current thread does not get aborted out of
3052 	 * the waits inside these operations.
3053 	 */
3054 	interrupt_save = thread_interrupt_level(THREAD_UNINT);
3055 
3056 	/*
3057 	 *	Indicate that we want all the threads to stop executing
3058 	 *	at user space by holding the task (we would have held
3059 	 *	each thread independently in thread_terminate_internal -
3060 	 *	but this way we may be more likely to already find it
3061 	 *	held there).  Mark the task inactive, and prevent
3062 	 *	further task operations via the task port.
3063 	 *
3064 	 *	The vm_map and ipc_space must exist until this function returns,
3065 	 *	convert_port_to_{map,space}_with_flavor relies on this behavior.
3066 	 */
3067 	task_hold_locked(task);
3068 	task->active = FALSE;
3069 	ipc_task_disable(task);
3070 
3071 #if CONFIG_EXCLAVES
3072 	task_stop_conclave(task, false);
3073 #endif /* CONFIG_EXCLAVES */
3074 
3075 #if CONFIG_TELEMETRY
3076 	/*
3077 	 * Notify telemetry that this task is going away.
3078 	 */
3079 	telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
3080 #endif
3081 
3082 	/*
3083 	 *	Terminate each thread in the task.
3084 	 */
3085 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3086 		thread_terminate_internal(thread);
3087 	}
3088 
3089 #ifdef MACH_BSD
3090 	void *bsd_info = get_bsdtask_info(task);
3091 	if (bsd_info != NULL) {
3092 		pid = proc_pid(bsd_info);
3093 	}
3094 #endif /* MACH_BSD */
3095 
3096 	task_unlock(task);
3097 
3098 	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3099 	    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3100 
3101 	/* Early object reap phase */
3102 
3103 // PR-17045188: Revisit implementation
3104 //        task_partial_reap(task, pid);
3105 
3106 #if CONFIG_TASKWATCH
3107 	/*
3108 	 * remove all task watchers
3109 	 */
3110 	task_removewatchers(task);
3111 
3112 #endif /* CONFIG_TASKWATCH */
3113 
3114 	/*
3115 	 *	Destroy all synchronizers owned by the task.
3116 	 */
3117 	task_synchronizer_destroy_all(task);
3118 
3119 	/*
3120 	 *	Clear the watchport boost on the task.
3121 	 */
3122 	task_remove_turnstile_watchports(task);
3123 
3124 	/* let iokit know 1 */
3125 	iokit_task_terminate(task, 1);
3126 
3127 	/*
3128 	 *	Destroy the IPC space, leaving just a reference for it.
3129 	 */
3130 	ipc_space_terminate(task->itk_space);
3131 
3132 #if 00
3133 	/* if some ledgers go negative on tear-down again... */
3134 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3135 	    task_ledgers.phys_footprint);
3136 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3137 	    task_ledgers.internal);
3138 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3139 	    task_ledgers.iokit_mapped);
3140 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3141 	    task_ledgers.alternate_accounting);
3142 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3143 	    task_ledgers.alternate_accounting_compressed);
3144 #endif
3145 
3146 #if CONFIG_DEFERRED_RECLAIM
3147 	/*
3148 	 * Remove this tasks reclaim buffer from global queues.
3149 	 */
3150 	if (task->deferred_reclamation_metadata != NULL) {
3151 		vm_deferred_reclamation_buffer_uninstall(task->deferred_reclamation_metadata);
3152 	}
3153 #endif /* CONFIG_DEFERRED_RECLAIM */
3154 
3155 	/*
3156 	 * If the current thread is a member of the task
3157 	 * being terminated, then the last reference to
3158 	 * the task will not be dropped until the thread
3159 	 * is finally reaped.  To avoid incurring the
3160 	 * expense of removing the address space regions
3161 	 * at reap time, we do it explictly here.
3162 	 */
3163 
3164 #if MACH_ASSERT
3165 	/*
3166 	 * Identify the pmap's process, in case the pmap ledgers drift
3167 	 * and we have to report it.
3168 	 */
3169 	char procname[17];
3170 	void *proc = get_bsdtask_info(task);
3171 	if (proc) {
3172 		pid = proc_pid(proc);
3173 		proc_name_kdp(proc, procname, sizeof(procname));
3174 	} else {
3175 		pid = 0;
3176 		strlcpy(procname, "<unknown>", sizeof(procname));
3177 	}
3178 	pmap_set_process(task->map->pmap, pid, procname);
3179 	if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3180 		DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3181 		if (debug4k_panic_on_terminate) {
3182 			panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3183 		}
3184 	}
3185 #endif /* MACH_ASSERT */
3186 
3187 	vm_map_terminate(task->map);
3188 
3189 	/* release our shared region */
3190 	vm_shared_region_set(task, NULL);
3191 
3192 #if __has_feature(ptrauth_calls)
3193 	task_set_shared_region_id(task, NULL);
3194 #endif /* __has_feature(ptrauth_calls) */
3195 
3196 	lck_mtx_lock(&tasks_threads_lock);
3197 	queue_remove(&tasks, task, task_t, tasks);
3198 	queue_enter(&terminated_tasks, task, task_t, tasks);
3199 	tasks_count--;
3200 	terminated_tasks_count++;
3201 	lck_mtx_unlock(&tasks_threads_lock);
3202 
3203 	/*
3204 	 * We no longer need to guard against being aborted, so restore
3205 	 * the previous interruptible state.
3206 	 */
3207 	thread_interrupt_level(interrupt_save);
3208 
3209 #if CONFIG_CPU_COUNTERS
3210 	/* force the task to release all ctrs */
3211 	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3212 		kpc_force_all_ctrs(task, 0);
3213 	}
3214 #endif /* CONFIG_CPU_COUNTERS */
3215 
3216 #if CONFIG_COALITIONS
3217 	/*
3218 	 * Leave the coalition for corpse task or task that
3219 	 * never had any active threads (e.g. fork, exec failure).
3220 	 * For task with active threads, the task will be removed
3221 	 * from coalition by last terminating thread.
3222 	 */
3223 	if (task->active_thread_count == 0) {
3224 		coalitions_remove_task(task);
3225 	}
3226 #endif
3227 
3228 #if CONFIG_FREEZE
3229 	extern int      vm_compressor_available;
3230 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3231 		task_disown_frozen_csegs(task);
3232 		assert(queue_empty(&task->task_frozen_cseg_q));
3233 	}
3234 #endif /* CONFIG_FREEZE */
3235 
3236 
3237 	/*
3238 	 * Get rid of the task active reference on itself.
3239 	 */
3240 	task_deallocate_grp(task, TASK_GRP_INTERNAL);
3241 
3242 	return KERN_SUCCESS;
3243 }
3244 
3245 void
tasks_system_suspend(boolean_t suspend)3246 tasks_system_suspend(boolean_t suspend)
3247 {
3248 	task_t task;
3249 
3250 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3251 	    (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3252 
3253 	lck_mtx_lock(&tasks_threads_lock);
3254 	assert(tasks_suspend_state != suspend);
3255 	tasks_suspend_state = suspend;
3256 	queue_iterate(&tasks, task, task_t, tasks) {
3257 		if (task == kernel_task) {
3258 			continue;
3259 		}
3260 		suspend ? task_suspend_internal(task) : task_resume_internal(task);
3261 	}
3262 	lck_mtx_unlock(&tasks_threads_lock);
3263 }
3264 
3265 /*
3266  * task_start_halt:
3267  *
3268  *      Shut the current task down (except for the current thread) in
3269  *	preparation for dramatic changes to the task (probably exec).
3270  *	We hold the task and mark all other threads in the task for
3271  *	termination.
3272  */
3273 kern_return_t
task_start_halt(task_t task)3274 task_start_halt(task_t task)
3275 {
3276 	kern_return_t kr = KERN_SUCCESS;
3277 	task_lock(task);
3278 	kr = task_start_halt_locked(task, FALSE);
3279 	task_unlock(task);
3280 	return kr;
3281 }
3282 
3283 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3284 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3285 {
3286 	thread_t thread, self;
3287 	uint64_t dispatchqueue_offset;
3288 
3289 	assert(task != kernel_task);
3290 
3291 	self = current_thread();
3292 
3293 	if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3294 		return KERN_INVALID_ARGUMENT;
3295 	}
3296 
3297 	if (!should_mark_corpse &&
3298 	    (task->halting || !task->active || !self->active)) {
3299 		/*
3300 		 * Task or current thread is already being terminated.
3301 		 * Hurry up and return out of the current kernel context
3302 		 * so that we run our AST special handler to terminate
3303 		 * ourselves. If should_mark_corpse is set, the corpse
3304 		 * creation might have raced with exec, let the corpse
3305 		 * creation continue, once the current thread reaches AST
3306 		 * thread in exec will be woken up from task_complete_halt.
3307 		 * Exec will fail cause the proc was marked for exit.
3308 		 * Once the thread in exec reaches AST, it will call proc_exit
3309 		 * and deliver the EXC_CORPSE_NOTIFY.
3310 		 */
3311 		return KERN_FAILURE;
3312 	}
3313 
3314 	/* Thread creation will fail after this point of no return. */
3315 	task->halting = TRUE;
3316 
3317 	/*
3318 	 * Mark all the threads to keep them from starting any more
3319 	 * user-level execution. The thread_terminate_internal code
3320 	 * would do this on a thread by thread basis anyway, but this
3321 	 * gives us a better chance of not having to wait there.
3322 	 */
3323 	task_hold_locked(task);
3324 
3325 #if CONFIG_EXCLAVES
3326 	if (should_mark_corpse) {
3327 		void *crash_info_ptr = task_get_corpseinfo(task);
3328 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
3329 			if (crash_info_ptr != NULL && thread->th_exclaves_ipc_buffer != NULL) {
3330 				struct thread_crash_exclaves_info info = { 0 };
3331 
3332 				info.tcei_flags = kExclaveRPCActive;
3333 				info.tcei_scid = thread->th_exclaves_scheduling_context_id;
3334 				info.tcei_thread_id = thread->thread_id;
3335 
3336 				kcdata_push_data(crash_info_ptr,
3337 				    STACKSHOT_KCTYPE_KERN_EXCLAVES_CRASH_THREADINFO,
3338 				    sizeof(struct thread_crash_exclaves_info), &info);
3339 			}
3340 		}
3341 
3342 		task_unlock(task);
3343 		task_stop_conclave(task, true);
3344 		task_lock(task);
3345 	}
3346 #endif /* CONFIG_EXCLAVES */
3347 
3348 	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3349 	/*
3350 	 * Terminate all the other threads in the task.
3351 	 */
3352 	queue_iterate(&task->threads, thread, thread_t, task_threads)
3353 	{
3354 		/*
3355 		 * Remove priority throttles for threads to terminate timely. This has
3356 		 * to be done after task_hold_locked() traps all threads to AST, but before
3357 		 * threads are marked inactive in thread_terminate_internal(). Takes thread
3358 		 * mutex lock.
3359 		 *
3360 		 * We need task_is_a_corpse() check so that we don't accidently update policy
3361 		 * for tasks that are doing posix_spawn().
3362 		 *
3363 		 * See: thread_policy_update_tasklocked().
3364 		 */
3365 		if (task_is_a_corpse(task)) {
3366 			proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3367 			    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3368 		}
3369 
3370 		if (should_mark_corpse) {
3371 			thread_mtx_lock(thread);
3372 			thread->inspection = TRUE;
3373 			thread_mtx_unlock(thread);
3374 		}
3375 		if (thread != self) {
3376 			thread_terminate_internal(thread);
3377 		}
3378 	}
3379 	task->dispatchqueue_offset = dispatchqueue_offset;
3380 
3381 	task_release_locked(task);
3382 
3383 	return KERN_SUCCESS;
3384 }
3385 
3386 
3387 /*
3388  * task_complete_halt:
3389  *
3390  *	Complete task halt by waiting for threads to terminate, then clean
3391  *	up task resources (VM, port namespace, etc...) and then let the
3392  *	current thread go in the (practically empty) task context.
3393  *
3394  *	Note: task->halting flag is not cleared in order to avoid creation
3395  *	of new thread in old exec'ed task.
3396  */
3397 void
task_complete_halt(task_t task)3398 task_complete_halt(task_t task)
3399 {
3400 	task_lock(task);
3401 	assert(task->halting);
3402 	assert(task == current_task());
3403 
3404 	/*
3405 	 *	Wait for the other threads to get shut down.
3406 	 *      When the last other thread is reaped, we'll be
3407 	 *	woken up.
3408 	 */
3409 	if (task->thread_count > 1) {
3410 		assert_wait((event_t)&task->halting, THREAD_UNINT);
3411 		task_unlock(task);
3412 		thread_block(THREAD_CONTINUE_NULL);
3413 	} else {
3414 		task_unlock(task);
3415 	}
3416 
3417 #if CONFIG_DEFERRED_RECLAIM
3418 	if (task->deferred_reclamation_metadata) {
3419 		vm_deferred_reclamation_buffer_uninstall(
3420 			task->deferred_reclamation_metadata);
3421 		vm_deferred_reclamation_buffer_deallocate(
3422 			task->deferred_reclamation_metadata);
3423 		task->deferred_reclamation_metadata = NULL;
3424 	}
3425 #endif /* CONFIG_DEFERRED_RECLAIM */
3426 
3427 	/*
3428 	 *	Give the machine dependent code a chance
3429 	 *	to perform cleanup of task-level resources
3430 	 *	associated with the current thread before
3431 	 *	ripping apart the task.
3432 	 */
3433 	machine_task_terminate(task);
3434 
3435 	/*
3436 	 *	Destroy all synchronizers owned by the task.
3437 	 */
3438 	task_synchronizer_destroy_all(task);
3439 
3440 	/* let iokit know 1 */
3441 	iokit_task_terminate(task, 1);
3442 
3443 	/*
3444 	 *	Terminate the IPC space.  A long time ago,
3445 	 *	this used to be ipc_space_clean() which would
3446 	 *	keep the space active but hollow it.
3447 	 *
3448 	 *	We really do not need this semantics given
3449 	 *	tasks die with exec now.
3450 	 */
3451 	ipc_space_terminate(task->itk_space);
3452 
3453 	/*
3454 	 * Clean out the address space, as we are going to be
3455 	 * getting a new one.
3456 	 */
3457 	vm_map_terminate(task->map);
3458 
3459 	/*
3460 	 * Kick out any IOKitUser handles to the task. At best they're stale,
3461 	 * at worst someone is racing a SUID exec.
3462 	 */
3463 	/* let iokit know 2 */
3464 	iokit_task_terminate(task, 2);
3465 }
3466 
3467 #ifdef CONFIG_TASK_SUSPEND_STATS
3468 
3469 static void
_task_mark_suspend_source(task_t task)3470 _task_mark_suspend_source(task_t task)
3471 {
3472 	int idx;
3473 	task_suspend_stats_t stats;
3474 	task_suspend_source_t source;
3475 	task_lock_assert_owned(task);
3476 	stats = &task->t_suspend_stats;
3477 
3478 	idx = stats->tss_count % TASK_SUSPEND_SOURCES_MAX;
3479 	source = &task->t_suspend_sources[idx];
3480 	bzero(source, sizeof(*source));
3481 
3482 	source->tss_time = mach_absolute_time();
3483 	source->tss_tid = current_thread()->thread_id;
3484 	source->tss_pid = task_pid(current_task());
3485 	task_best_name(current_task(), source->tss_procname, sizeof(source->tss_procname));
3486 
3487 	stats->tss_count++;
3488 }
3489 
3490 static inline void
_task_mark_suspend_start(task_t task)3491 _task_mark_suspend_start(task_t task)
3492 {
3493 	task_lock_assert_owned(task);
3494 	task->t_suspend_stats.tss_last_start = mach_absolute_time();
3495 }
3496 
3497 static inline void
_task_mark_suspend_end(task_t task)3498 _task_mark_suspend_end(task_t task)
3499 {
3500 	task_lock_assert_owned(task);
3501 	task->t_suspend_stats.tss_last_end = mach_absolute_time();
3502 	task->t_suspend_stats.tss_duration += (task->t_suspend_stats.tss_last_end -
3503 	    task->t_suspend_stats.tss_last_start);
3504 }
3505 
3506 static kern_return_t
_task_get_suspend_stats_locked(task_t task,task_suspend_stats_t stats)3507 _task_get_suspend_stats_locked(task_t task, task_suspend_stats_t stats)
3508 {
3509 	if (task == TASK_NULL || stats == NULL) {
3510 		return KERN_INVALID_ARGUMENT;
3511 	}
3512 	task_lock_assert_owned(task);
3513 	memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3514 	return KERN_SUCCESS;
3515 }
3516 
3517 static kern_return_t
_task_get_suspend_sources_locked(task_t task,task_suspend_source_t sources)3518 _task_get_suspend_sources_locked(task_t task, task_suspend_source_t sources)
3519 {
3520 	if (task == TASK_NULL || sources == NULL) {
3521 		return KERN_INVALID_ARGUMENT;
3522 	}
3523 	task_lock_assert_owned(task);
3524 	memcpy(sources, task->t_suspend_sources,
3525 	    sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3526 	return KERN_SUCCESS;
3527 }
3528 
3529 #endif /* CONFIG_TASK_SUSPEND_STATS */
3530 
3531 kern_return_t
task_get_suspend_stats(task_t task,task_suspend_stats_t stats)3532 task_get_suspend_stats(task_t task, task_suspend_stats_t stats)
3533 {
3534 #ifdef CONFIG_TASK_SUSPEND_STATS
3535 	kern_return_t kr;
3536 	if (task == TASK_NULL || stats == NULL) {
3537 		return KERN_INVALID_ARGUMENT;
3538 	}
3539 	task_lock(task);
3540 	kr = _task_get_suspend_stats_locked(task, stats);
3541 	task_unlock(task);
3542 	return kr;
3543 #else /* CONFIG_TASK_SUSPEND_STATS */
3544 	(void)task;
3545 	(void)stats;
3546 	return KERN_NOT_SUPPORTED;
3547 #endif
3548 }
3549 
3550 kern_return_t
task_get_suspend_stats_kdp(task_t task,task_suspend_stats_t stats)3551 task_get_suspend_stats_kdp(task_t task, task_suspend_stats_t stats)
3552 {
3553 #ifdef CONFIG_TASK_SUSPEND_STATS
3554 	if (task == TASK_NULL || stats == NULL) {
3555 		return KERN_INVALID_ARGUMENT;
3556 	}
3557 	memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3558 	return KERN_SUCCESS;
3559 #else /* CONFIG_TASK_SUSPEND_STATS */
3560 #pragma unused(task, stats)
3561 	return KERN_NOT_SUPPORTED;
3562 #endif /* CONFIG_TASK_SUSPEND_STATS */
3563 }
3564 
3565 kern_return_t
task_get_suspend_sources(task_t task,task_suspend_source_array_t sources)3566 task_get_suspend_sources(task_t task, task_suspend_source_array_t sources)
3567 {
3568 #ifdef CONFIG_TASK_SUSPEND_STATS
3569 	kern_return_t kr;
3570 	if (task == TASK_NULL || sources == NULL) {
3571 		return KERN_INVALID_ARGUMENT;
3572 	}
3573 	task_lock(task);
3574 	kr = _task_get_suspend_sources_locked(task, sources);
3575 	task_unlock(task);
3576 	return kr;
3577 #else /* CONFIG_TASK_SUSPEND_STATS */
3578 	(void)task;
3579 	(void)sources;
3580 	return KERN_NOT_SUPPORTED;
3581 #endif
3582 }
3583 
3584 kern_return_t
task_get_suspend_sources_kdp(task_t task,task_suspend_source_array_t sources)3585 task_get_suspend_sources_kdp(task_t task, task_suspend_source_array_t sources)
3586 {
3587 #ifdef CONFIG_TASK_SUSPEND_STATS
3588 	if (task == TASK_NULL || sources == NULL) {
3589 		return KERN_INVALID_ARGUMENT;
3590 	}
3591 	memcpy(sources, task->t_suspend_sources,
3592 	    sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3593 	return KERN_SUCCESS;
3594 #else /* CONFIG_TASK_SUSPEND_STATS */
3595 #pragma unused(task, sources)
3596 	return KERN_NOT_SUPPORTED;
3597 #endif
3598 }
3599 
3600 /*
3601  *	task_hold_locked:
3602  *
3603  *	Suspend execution of the specified task.
3604  *	This is a recursive-style suspension of the task, a count of
3605  *	suspends is maintained.
3606  *
3607  *	CONDITIONS: the task is locked and active.
3608  */
3609 void
task_hold_locked(task_t task)3610 task_hold_locked(
3611 	task_t          task)
3612 {
3613 	thread_t        thread;
3614 	void *bsd_info = get_bsdtask_info(task);
3615 
3616 	assert(task->active);
3617 
3618 	if (task->suspend_count++ > 0) {
3619 		return;
3620 	}
3621 
3622 	if (bsd_info) {
3623 		workq_proc_suspended(bsd_info);
3624 	}
3625 
3626 	/*
3627 	 *	Iterate through all the threads and hold them.
3628 	 */
3629 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3630 		thread_mtx_lock(thread);
3631 		thread_hold(thread);
3632 		thread_mtx_unlock(thread);
3633 	}
3634 
3635 #ifdef CONFIG_TASK_SUSPEND_STATS
3636 	_task_mark_suspend_start(task);
3637 #endif
3638 }
3639 
3640 /*
3641  *	task_hold_and_wait
3642  *
3643  *	Same as the internal routine above, except that is must lock
3644  *	and verify that the task is active.  This differs from task_suspend
3645  *	in that it places a kernel hold on the task rather than just a
3646  *	user-level hold.  This keeps users from over resuming and setting
3647  *	it running out from under the kernel.
3648  *
3649  *      CONDITIONS: the caller holds a reference on the task
3650  */
3651 kern_return_t
task_hold_and_wait(task_t task)3652 task_hold_and_wait(
3653 	task_t          task)
3654 {
3655 	if (task == TASK_NULL) {
3656 		return KERN_INVALID_ARGUMENT;
3657 	}
3658 
3659 	task_lock(task);
3660 	if (!task->active) {
3661 		task_unlock(task);
3662 		return KERN_FAILURE;
3663 	}
3664 
3665 #ifdef CONFIG_TASK_SUSPEND_STATS
3666 	_task_mark_suspend_source(task);
3667 #endif /* CONFIG_TASK_SUSPEND_STATS */
3668 
3669 	task_hold_locked(task);
3670 	task_wait_locked(task, FALSE);
3671 	task_unlock(task);
3672 
3673 	return KERN_SUCCESS;
3674 }
3675 
3676 /*
3677  *	task_wait_locked:
3678  *
3679  *	Wait for all threads in task to stop.
3680  *
3681  * Conditions:
3682  *	Called with task locked, active, and held.
3683  */
3684 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3685 task_wait_locked(
3686 	task_t          task,
3687 	boolean_t               until_not_runnable)
3688 {
3689 	thread_t        thread, self;
3690 
3691 	assert(task->active);
3692 	assert(task->suspend_count > 0);
3693 
3694 	self = current_thread();
3695 
3696 	/*
3697 	 *	Iterate through all the threads and wait for them to
3698 	 *	stop.  Do not wait for the current thread if it is within
3699 	 *	the task.
3700 	 */
3701 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3702 		if (thread != self) {
3703 			thread_wait(thread, until_not_runnable);
3704 		}
3705 	}
3706 }
3707 
3708 boolean_t
task_is_app_suspended(task_t task)3709 task_is_app_suspended(task_t task)
3710 {
3711 	return task->pidsuspended;
3712 }
3713 
3714 /*
3715  *	task_release_locked:
3716  *
3717  *	Release a kernel hold on a task.
3718  *
3719  *      CONDITIONS: the task is locked and active
3720  */
3721 void
task_release_locked(task_t task)3722 task_release_locked(
3723 	task_t          task)
3724 {
3725 	thread_t        thread;
3726 	void *bsd_info = get_bsdtask_info(task);
3727 
3728 	assert(task->active);
3729 	assert(task->suspend_count > 0);
3730 
3731 	if (--task->suspend_count > 0) {
3732 		return;
3733 	}
3734 
3735 	if (bsd_info) {
3736 		workq_proc_resumed(bsd_info);
3737 	}
3738 
3739 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3740 		thread_mtx_lock(thread);
3741 		thread_release(thread);
3742 		thread_mtx_unlock(thread);
3743 	}
3744 
3745 #if CONFIG_TASK_SUSPEND_STATS
3746 	_task_mark_suspend_end(task);
3747 #endif
3748 }
3749 
3750 /*
3751  *	task_release:
3752  *
3753  *	Same as the internal routine above, except that it must lock
3754  *	and verify that the task is active.
3755  *
3756  *      CONDITIONS: The caller holds a reference to the task
3757  */
3758 kern_return_t
task_release(task_t task)3759 task_release(
3760 	task_t          task)
3761 {
3762 	if (task == TASK_NULL) {
3763 		return KERN_INVALID_ARGUMENT;
3764 	}
3765 
3766 	task_lock(task);
3767 
3768 	if (!task->active) {
3769 		task_unlock(task);
3770 
3771 		return KERN_FAILURE;
3772 	}
3773 
3774 	task_release_locked(task);
3775 	task_unlock(task);
3776 
3777 	return KERN_SUCCESS;
3778 }
3779 
3780 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3781 task_threads_internal(
3782 	task_t                  task,
3783 	thread_act_array_t     *threads_out,
3784 	mach_msg_type_number_t *countp,
3785 	mach_thread_flavor_t    flavor)
3786 {
3787 	mach_msg_type_number_t  actual, count, count_needed;
3788 	thread_act_array_t      thread_list;
3789 	thread_t                thread;
3790 	unsigned int            i;
3791 
3792 	count = 0;
3793 	thread_list = NULL;
3794 
3795 	if (task == TASK_NULL) {
3796 		return KERN_INVALID_ARGUMENT;
3797 	}
3798 
3799 	assert(flavor <= THREAD_FLAVOR_INSPECT);
3800 
3801 	for (;;) {
3802 		task_lock(task);
3803 		if (!task->active) {
3804 			task_unlock(task);
3805 
3806 			mach_port_array_free(thread_list, count);
3807 			return KERN_FAILURE;
3808 		}
3809 
3810 		count_needed = actual = task->thread_count;
3811 		if (count_needed <= count) {
3812 			break;
3813 		}
3814 
3815 		/* unlock the task and allocate more memory */
3816 		task_unlock(task);
3817 
3818 		mach_port_array_free(thread_list, count);
3819 		count = count_needed;
3820 		thread_list = mach_port_array_alloc(count, Z_WAITOK);
3821 
3822 		if (thread_list == NULL) {
3823 			return KERN_RESOURCE_SHORTAGE;
3824 		}
3825 	}
3826 
3827 	i = 0;
3828 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3829 		assert(i < actual);
3830 		thread_reference(thread);
3831 		((thread_t *)thread_list)[i++] = thread;
3832 	}
3833 
3834 	count_needed = actual;
3835 
3836 	/* can unlock task now that we've got the thread refs */
3837 	task_unlock(task);
3838 
3839 	if (actual == 0) {
3840 		/* no threads, so return null pointer and deallocate memory */
3841 
3842 		mach_port_array_free(thread_list, count);
3843 
3844 		*threads_out = NULL;
3845 		*countp = 0;
3846 	} else {
3847 		/* if we allocated too much, must copy */
3848 		if (count_needed < count) {
3849 			mach_port_array_t newaddr;
3850 
3851 			newaddr = mach_port_array_alloc(count_needed, Z_WAITOK);
3852 			if (newaddr == NULL) {
3853 				for (i = 0; i < actual; ++i) {
3854 					thread_deallocate(((thread_t *)thread_list)[i]);
3855 				}
3856 				mach_port_array_free(thread_list, count);
3857 				return KERN_RESOURCE_SHORTAGE;
3858 			}
3859 
3860 			bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3861 			mach_port_array_free(thread_list, count);
3862 			thread_list = newaddr;
3863 		}
3864 
3865 		/* do the conversion that Mig should handle */
3866 		convert_thread_array_to_ports(thread_list, actual, flavor);
3867 
3868 		*threads_out = thread_list;
3869 		*countp = actual;
3870 	}
3871 
3872 	return KERN_SUCCESS;
3873 }
3874 
3875 
3876 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3877 task_threads_from_user(
3878 	mach_port_t                 port,
3879 	thread_act_array_t         *threads_out,
3880 	mach_msg_type_number_t     *count)
3881 {
3882 	ipc_kobject_type_t kotype;
3883 	kern_return_t kr;
3884 
3885 	task_t task = convert_port_to_task_inspect_no_eval(port);
3886 
3887 	if (task == TASK_NULL) {
3888 		return KERN_INVALID_ARGUMENT;
3889 	}
3890 
3891 	kotype = ip_kotype(port);
3892 
3893 	switch (kotype) {
3894 	case IKOT_TASK_CONTROL:
3895 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3896 		break;
3897 	case IKOT_TASK_READ:
3898 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3899 		break;
3900 	case IKOT_TASK_INSPECT:
3901 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3902 		break;
3903 	default:
3904 		panic("strange kobject type");
3905 		break;
3906 	}
3907 
3908 	task_deallocate(task);
3909 	return kr;
3910 }
3911 
3912 #define TASK_HOLD_NORMAL        0
3913 #define TASK_HOLD_PIDSUSPEND    1
3914 #define TASK_HOLD_LEGACY        2
3915 #define TASK_HOLD_LEGACY_ALL    3
3916 
3917 static kern_return_t
place_task_hold(task_t task,int mode)3918 place_task_hold(
3919 	task_t task,
3920 	int mode)
3921 {
3922 	if (!task->active && !task_is_a_corpse(task)) {
3923 		return KERN_FAILURE;
3924 	}
3925 
3926 	/* Return success for corpse task */
3927 	if (task_is_a_corpse(task)) {
3928 		return KERN_SUCCESS;
3929 	}
3930 
3931 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3932 	    task_pid(task),
3933 	    task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3934 	    task->user_stop_count, task->user_stop_count + 1);
3935 
3936 #if MACH_ASSERT
3937 	current_task()->suspends_outstanding++;
3938 #endif
3939 
3940 	if (mode == TASK_HOLD_LEGACY) {
3941 		task->legacy_stop_count++;
3942 	}
3943 
3944 #ifdef CONFIG_TASK_SUSPEND_STATS
3945 	_task_mark_suspend_source(task);
3946 #endif /* CONFIG_TASK_SUSPEND_STATS */
3947 
3948 	if (task->user_stop_count++ > 0) {
3949 		/*
3950 		 *	If the stop count was positive, the task is
3951 		 *	already stopped and we can exit.
3952 		 */
3953 		return KERN_SUCCESS;
3954 	}
3955 
3956 	/*
3957 	 * Put a kernel-level hold on the threads in the task (all
3958 	 * user-level task suspensions added together represent a
3959 	 * single kernel-level hold).  We then wait for the threads
3960 	 * to stop executing user code.
3961 	 */
3962 	task_hold_locked(task);
3963 	task_wait_locked(task, FALSE);
3964 
3965 	return KERN_SUCCESS;
3966 }
3967 
3968 static kern_return_t
release_task_hold(task_t task,int mode)3969 release_task_hold(
3970 	task_t          task,
3971 	int                     mode)
3972 {
3973 	boolean_t release = FALSE;
3974 
3975 	if (!task->active && !task_is_a_corpse(task)) {
3976 		return KERN_FAILURE;
3977 	}
3978 
3979 	/* Return success for corpse task */
3980 	if (task_is_a_corpse(task)) {
3981 		return KERN_SUCCESS;
3982 	}
3983 
3984 	if (mode == TASK_HOLD_PIDSUSPEND) {
3985 		if (task->pidsuspended == FALSE) {
3986 			return KERN_FAILURE;
3987 		}
3988 		task->pidsuspended = FALSE;
3989 	}
3990 
3991 	if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3992 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3993 		    MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3994 		    task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3995 		    task->user_stop_count, mode, task->legacy_stop_count);
3996 
3997 #if MACH_ASSERT
3998 		/*
3999 		 * This is obviously not robust; if we suspend one task and then resume a different one,
4000 		 * we'll fly under the radar. This is only meant to catch the common case of a crashed
4001 		 * or buggy suspender.
4002 		 */
4003 		current_task()->suspends_outstanding--;
4004 #endif
4005 
4006 		if (mode == TASK_HOLD_LEGACY_ALL) {
4007 			if (task->legacy_stop_count >= task->user_stop_count) {
4008 				task->user_stop_count = 0;
4009 				release = TRUE;
4010 			} else {
4011 				task->user_stop_count -= task->legacy_stop_count;
4012 			}
4013 			task->legacy_stop_count = 0;
4014 		} else {
4015 			if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
4016 				task->legacy_stop_count--;
4017 			}
4018 			if (--task->user_stop_count == 0) {
4019 				release = TRUE;
4020 			}
4021 		}
4022 	} else {
4023 		return KERN_FAILURE;
4024 	}
4025 
4026 	/*
4027 	 *	Release the task if necessary.
4028 	 */
4029 	if (release) {
4030 		task_release_locked(task);
4031 	}
4032 
4033 	return KERN_SUCCESS;
4034 }
4035 
4036 boolean_t
get_task_suspended(task_t task)4037 get_task_suspended(task_t task)
4038 {
4039 	return 0 != task->user_stop_count;
4040 }
4041 
4042 /*
4043  *	task_suspend:
4044  *
4045  *	Implement an (old-fashioned) user-level suspension on a task.
4046  *
4047  *	Because the user isn't expecting to have to manage a suspension
4048  *	token, we'll track it for him in the kernel in the form of a naked
4049  *	send right to the task's resume port.  All such send rights
4050  *	account for a single suspension against the task (unlike task_suspend2()
4051  *	where each caller gets a unique suspension count represented by a
4052  *	unique send-once right).
4053  *
4054  * Conditions:
4055  *      The caller holds a reference to the task
4056  */
4057 kern_return_t
task_suspend(task_t task)4058 task_suspend(
4059 	task_t          task)
4060 {
4061 	kern_return_t                   kr;
4062 	mach_port_t                     port;
4063 	mach_port_name_t                name;
4064 
4065 	if (task == TASK_NULL || task == kernel_task) {
4066 		return KERN_INVALID_ARGUMENT;
4067 	}
4068 
4069 	/*
4070 	 * place a legacy hold on the task.
4071 	 */
4072 	task_lock(task);
4073 	kr = place_task_hold(task, TASK_HOLD_LEGACY);
4074 	task_unlock(task);
4075 
4076 	if (kr != KERN_SUCCESS) {
4077 		return kr;
4078 	}
4079 
4080 	/*
4081 	 * Claim a send right on the task resume port, and request a no-senders
4082 	 * notification on that port (if none outstanding).
4083 	 */
4084 	itk_lock(task);
4085 	port = task->itk_resume;
4086 	if (port == IP_NULL) {
4087 		port = ipc_kobject_alloc_port(task, IKOT_TASK_RESUME,
4088 		    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
4089 		task->itk_resume = port;
4090 	} else {
4091 		(void)ipc_kobject_make_send_nsrequest(port, task, IKOT_TASK_RESUME);
4092 	}
4093 	itk_unlock(task);
4094 
4095 	/*
4096 	 * Copyout the send right into the calling task's IPC space.  It won't know it is there,
4097 	 * but we'll look it up when calling a traditional resume.  Any IPC operations that
4098 	 * deallocate the send right will auto-release the suspension.
4099 	 */
4100 	if (IP_VALID(port)) {
4101 		kr = ipc_object_copyout(current_space(), ip_to_object(port),
4102 		    MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
4103 		    NULL, NULL, &name);
4104 	} else {
4105 		kr = KERN_SUCCESS;
4106 	}
4107 	if (kr != KERN_SUCCESS) {
4108 		printf("warning: %s(%d) failed to copyout suspension "
4109 		    "token for pid %d with error: %d\n",
4110 		    proc_name_address(get_bsdtask_info(current_task())),
4111 		    proc_pid(get_bsdtask_info(current_task())),
4112 		    task_pid(task), kr);
4113 	}
4114 
4115 	return kr;
4116 }
4117 
4118 /*
4119  *	task_resume:
4120  *		Release a user hold on a task.
4121  *
4122  * Conditions:
4123  *		The caller holds a reference to the task
4124  */
4125 kern_return_t
task_resume(task_t task)4126 task_resume(
4127 	task_t  task)
4128 {
4129 	kern_return_t    kr;
4130 	mach_port_name_t resume_port_name;
4131 	ipc_entry_t              resume_port_entry;
4132 	ipc_space_t              space = current_task()->itk_space;
4133 
4134 	if (task == TASK_NULL || task == kernel_task) {
4135 		return KERN_INVALID_ARGUMENT;
4136 	}
4137 
4138 	/* release a legacy task hold */
4139 	task_lock(task);
4140 	kr = release_task_hold(task, TASK_HOLD_LEGACY);
4141 	task_unlock(task);
4142 
4143 	itk_lock(task); /* for itk_resume */
4144 	is_write_lock(space); /* spin lock */
4145 	if (is_active(space) && IP_VALID(task->itk_resume) &&
4146 	    ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
4147 		/*
4148 		 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
4149 		 * we are holding one less legacy hold on the task from this caller.  If the release failed,
4150 		 * go ahead and drop all the rights, as someone either already released our holds or the task
4151 		 * is gone.
4152 		 */
4153 		itk_unlock(task);
4154 		if (kr == KERN_SUCCESS) {
4155 			ipc_right_dealloc(space, resume_port_name, resume_port_entry);
4156 		} else {
4157 			ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
4158 		}
4159 		/* space unlocked */
4160 	} else {
4161 		itk_unlock(task);
4162 		is_write_unlock(space);
4163 		if (kr == KERN_SUCCESS) {
4164 			printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
4165 			    proc_name_address(get_bsdtask_info(current_task())), proc_pid(get_bsdtask_info(current_task())),
4166 			    task_pid(task));
4167 		}
4168 	}
4169 
4170 	return kr;
4171 }
4172 
4173 /*
4174  * Suspend a task that is already protected by a held lock.
4175  * Making/holding a token/reference/port is the caller's responsibility.
4176  */
4177 kern_return_t
task_suspend_internal_locked(task_t task)4178 task_suspend_internal_locked(task_t task)
4179 {
4180 	if (task == TASK_NULL || task == kernel_task) {
4181 		return KERN_INVALID_ARGUMENT;
4182 	}
4183 
4184 	return place_task_hold(task, TASK_HOLD_NORMAL);
4185 }
4186 
4187 /*
4188  * Suspend a task.
4189  * Making/holding a token/reference/port is the caller's responsibility.
4190  */
4191 kern_return_t
task_suspend_internal(task_t task)4192 task_suspend_internal(task_t task)
4193 {
4194 	kern_return_t    kr;
4195 
4196 	if (task == TASK_NULL || task == kernel_task) {
4197 		return KERN_INVALID_ARGUMENT;
4198 	}
4199 
4200 	task_lock(task);
4201 	kr = task_suspend_internal_locked(task);
4202 	task_unlock(task);
4203 	return kr;
4204 }
4205 
4206 /*
4207  * Suspend the target task, and return a suspension token. The token
4208  * represents a reference on the suspended task.
4209  */
4210 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)4211 task_suspend2_grp(
4212 	task_t                  task,
4213 	task_suspension_token_t *suspend_token,
4214 	task_grp_t              grp)
4215 {
4216 	kern_return_t    kr;
4217 
4218 	kr = task_suspend_internal(task);
4219 	if (kr != KERN_SUCCESS) {
4220 		*suspend_token = TASK_NULL;
4221 		return kr;
4222 	}
4223 
4224 	/*
4225 	 * Take a reference on the target task and return that to the caller
4226 	 * as a "suspension token," which can be converted into an SO right to
4227 	 * the now-suspended task's resume port.
4228 	 */
4229 	task_reference_grp(task, grp);
4230 	*suspend_token = task;
4231 
4232 	return KERN_SUCCESS;
4233 }
4234 
4235 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)4236 task_suspend2_mig(
4237 	task_t                  task,
4238 	task_suspension_token_t *suspend_token)
4239 {
4240 	return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
4241 }
4242 
4243 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)4244 task_suspend2_external(
4245 	task_t                  task,
4246 	task_suspension_token_t *suspend_token)
4247 {
4248 	return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
4249 }
4250 
4251 /*
4252  * Resume a task that is already protected by a held lock.
4253  * (reference/token/port management is caller's responsibility).
4254  */
4255 kern_return_t
task_resume_internal_locked(task_suspension_token_t task)4256 task_resume_internal_locked(
4257 	task_suspension_token_t         task)
4258 {
4259 	if (task == TASK_NULL || task == kernel_task) {
4260 		return KERN_INVALID_ARGUMENT;
4261 	}
4262 
4263 	return release_task_hold(task, TASK_HOLD_NORMAL);
4264 }
4265 
4266 /*
4267  * Resume a task.
4268  * (reference/token/port management is caller's responsibility).
4269  */
4270 kern_return_t
task_resume_internal(task_suspension_token_t task)4271 task_resume_internal(
4272 	task_suspension_token_t         task)
4273 {
4274 	kern_return_t kr;
4275 
4276 	if (task == TASK_NULL || task == kernel_task) {
4277 		return KERN_INVALID_ARGUMENT;
4278 	}
4279 
4280 	task_lock(task);
4281 	kr = task_resume_internal_locked(task);
4282 	task_unlock(task);
4283 	return kr;
4284 }
4285 
4286 /*
4287  * Resume the task using a suspension token. Consumes the token's ref.
4288  */
4289 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)4290 task_resume2_grp(
4291 	task_suspension_token_t         task,
4292 	task_grp_t                      grp)
4293 {
4294 	kern_return_t kr;
4295 
4296 	kr = task_resume_internal(task);
4297 	task_suspension_token_deallocate_grp(task, grp);
4298 
4299 	return kr;
4300 }
4301 
4302 kern_return_t
task_resume2_mig(task_suspension_token_t task)4303 task_resume2_mig(
4304 	task_suspension_token_t         task)
4305 {
4306 	return task_resume2_grp(task, TASK_GRP_MIG);
4307 }
4308 
4309 kern_return_t
task_resume2_external(task_suspension_token_t task)4310 task_resume2_external(
4311 	task_suspension_token_t         task)
4312 {
4313 	return task_resume2_grp(task, TASK_GRP_EXTERNAL);
4314 }
4315 
4316 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)4317 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
4318 {
4319 	task_t task = convert_port_to_task_suspension_token(port);
4320 	kern_return_t kr;
4321 
4322 	if (task == TASK_NULL) {
4323 		return;
4324 	}
4325 
4326 	if (task == kernel_task) {
4327 		task_suspension_token_deallocate(task);
4328 		return;
4329 	}
4330 
4331 	task_lock(task);
4332 
4333 	kr = ipc_kobject_nsrequest(port, mscount, NULL);
4334 	if (kr == KERN_FAILURE) {
4335 		/* release all the [remaining] outstanding legacy holds */
4336 		release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4337 	}
4338 
4339 	task_unlock(task);
4340 
4341 	task_suspension_token_deallocate(task);         /* drop token reference */
4342 }
4343 
4344 /*
4345  * Fires when a send once made
4346  * by convert_task_suspension_token_to_port() dies.
4347  */
4348 void
task_suspension_send_once(ipc_port_t port)4349 task_suspension_send_once(ipc_port_t port)
4350 {
4351 	task_t task = convert_port_to_task_suspension_token(port);
4352 
4353 	if (task == TASK_NULL || task == kernel_task) {
4354 		return; /* nothing to do */
4355 	}
4356 
4357 	/* release the hold held by this specific send-once right */
4358 	task_lock(task);
4359 	release_task_hold(task, TASK_HOLD_NORMAL);
4360 	task_unlock(task);
4361 
4362 	task_suspension_token_deallocate(task);         /* drop token reference */
4363 }
4364 
4365 static kern_return_t
task_pidsuspend_locked(task_t task)4366 task_pidsuspend_locked(task_t task)
4367 {
4368 	kern_return_t kr;
4369 
4370 	if (task->pidsuspended) {
4371 		kr = KERN_FAILURE;
4372 		goto out;
4373 	}
4374 
4375 	task->pidsuspended = TRUE;
4376 
4377 	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4378 	if (kr != KERN_SUCCESS) {
4379 		task->pidsuspended = FALSE;
4380 	}
4381 out:
4382 	return kr;
4383 }
4384 
4385 
4386 /*
4387  *	task_pidsuspend:
4388  *
4389  *	Suspends a task by placing a hold on its threads.
4390  *
4391  * Conditions:
4392  *      The caller holds a reference to the task
4393  */
4394 kern_return_t
task_pidsuspend(task_t task)4395 task_pidsuspend(
4396 	task_t          task)
4397 {
4398 	kern_return_t    kr;
4399 
4400 	if (task == TASK_NULL || task == kernel_task) {
4401 		return KERN_INVALID_ARGUMENT;
4402 	}
4403 
4404 	task_lock(task);
4405 
4406 	kr = task_pidsuspend_locked(task);
4407 
4408 	task_unlock(task);
4409 
4410 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4411 		iokit_task_app_suspended_changed(task);
4412 	}
4413 
4414 	return kr;
4415 }
4416 
4417 /*
4418  *	task_pidresume:
4419  *		Resumes a previously suspended task.
4420  *
4421  * Conditions:
4422  *		The caller holds a reference to the task
4423  */
4424 kern_return_t
task_pidresume(task_t task)4425 task_pidresume(
4426 	task_t  task)
4427 {
4428 	kern_return_t    kr;
4429 
4430 	if (task == TASK_NULL || task == kernel_task) {
4431 		return KERN_INVALID_ARGUMENT;
4432 	}
4433 
4434 	task_lock(task);
4435 
4436 #if CONFIG_FREEZE
4437 
4438 	while (task->changing_freeze_state) {
4439 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4440 		task_unlock(task);
4441 		thread_block(THREAD_CONTINUE_NULL);
4442 
4443 		task_lock(task);
4444 	}
4445 	task->changing_freeze_state = TRUE;
4446 #endif
4447 
4448 	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4449 
4450 	task_unlock(task);
4451 
4452 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4453 		iokit_task_app_suspended_changed(task);
4454 	}
4455 
4456 #if CONFIG_FREEZE
4457 
4458 	task_lock(task);
4459 
4460 	if (kr == KERN_SUCCESS) {
4461 		task->frozen = FALSE;
4462 	}
4463 	task->changing_freeze_state = FALSE;
4464 	thread_wakeup(&task->changing_freeze_state);
4465 
4466 	task_unlock(task);
4467 #endif
4468 
4469 	return kr;
4470 }
4471 
4472 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4473 
4474 /*
4475  *	task_add_turnstile_watchports:
4476  *		Setup watchports to boost the main thread of the task.
4477  *
4478  *	Arguments:
4479  *		task: task being spawned
4480  *		thread: main thread of task
4481  *		portwatch_ports: array of watchports
4482  *		portwatch_count: number of watchports
4483  *
4484  *	Conditions:
4485  *		Nothing locked.
4486  */
4487 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4488 task_add_turnstile_watchports(
4489 	task_t          task,
4490 	thread_t        thread,
4491 	ipc_port_t      *portwatch_ports,
4492 	uint32_t        portwatch_count)
4493 {
4494 	struct task_watchports *watchports = NULL;
4495 	struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4496 	os_ref_count_t refs;
4497 
4498 	/* Check if the task has terminated */
4499 	if (!task->active) {
4500 		return;
4501 	}
4502 
4503 	assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4504 
4505 	watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4506 
4507 	/* Lock the ipc space */
4508 	is_write_lock(task->itk_space);
4509 
4510 	/* Setup watchports to boost the main thread */
4511 	refs = task_add_turnstile_watchports_locked(task,
4512 	    watchports, previous_elem_array, portwatch_ports,
4513 	    portwatch_count);
4514 
4515 	/* Drop the space lock */
4516 	is_write_unlock(task->itk_space);
4517 
4518 	if (refs == 0) {
4519 		task_watchports_deallocate(watchports);
4520 	}
4521 
4522 	/* Drop the ref on previous_elem_array */
4523 	for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4524 		task_watchport_elem_deallocate(previous_elem_array[i]);
4525 	}
4526 }
4527 
4528 /*
4529  *	task_remove_turnstile_watchports:
4530  *		Clear all turnstile boost on the task from watchports.
4531  *
4532  *	Arguments:
4533  *		task: task being terminated
4534  *
4535  *	Conditions:
4536  *		Nothing locked.
4537  */
4538 void
task_remove_turnstile_watchports(task_t task)4539 task_remove_turnstile_watchports(
4540 	task_t          task)
4541 {
4542 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4543 	struct task_watchports *watchports = NULL;
4544 	ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4545 	uint32_t portwatch_count;
4546 
4547 	/* Lock the ipc space */
4548 	is_write_lock(task->itk_space);
4549 
4550 	/* Check if watchport boost exist */
4551 	if (task->watchports == NULL) {
4552 		is_write_unlock(task->itk_space);
4553 		return;
4554 	}
4555 	watchports = task->watchports;
4556 	portwatch_count = watchports->tw_elem_array_count;
4557 
4558 	refs = task_remove_turnstile_watchports_locked(task, watchports,
4559 	    port_freelist);
4560 
4561 	is_write_unlock(task->itk_space);
4562 
4563 	/* Drop all the port references */
4564 	for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4565 		ip_release(port_freelist[i]);
4566 	}
4567 
4568 	/* Clear the task and thread references for task_watchport */
4569 	if (refs == 0) {
4570 		task_watchports_deallocate(watchports);
4571 	}
4572 }
4573 
4574 /*
4575  *	task_transfer_turnstile_watchports:
4576  *		Transfer all watchport turnstile boost from old task to new task.
4577  *
4578  *	Arguments:
4579  *		old_task: task calling exec
4580  *		new_task: new exec'ed task
4581  *		thread: main thread of new task
4582  *
4583  *	Conditions:
4584  *		Nothing locked.
4585  */
4586 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4587 task_transfer_turnstile_watchports(
4588 	task_t   old_task,
4589 	task_t   new_task,
4590 	thread_t new_thread)
4591 {
4592 	struct task_watchports *old_watchports = NULL;
4593 	struct task_watchports *new_watchports = NULL;
4594 	os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4595 	os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4596 	uint32_t portwatch_count;
4597 
4598 	if (old_task->watchports == NULL || !new_task->active) {
4599 		return;
4600 	}
4601 
4602 	/* Get the watch port count from the old task */
4603 	is_write_lock(old_task->itk_space);
4604 	if (old_task->watchports == NULL) {
4605 		is_write_unlock(old_task->itk_space);
4606 		return;
4607 	}
4608 
4609 	portwatch_count = old_task->watchports->tw_elem_array_count;
4610 	is_write_unlock(old_task->itk_space);
4611 
4612 	new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4613 
4614 	/* Lock the ipc space for old task */
4615 	is_write_lock(old_task->itk_space);
4616 
4617 	/* Lock the ipc space for new task */
4618 	is_write_lock(new_task->itk_space);
4619 
4620 	/* Check if watchport boost exist */
4621 	if (old_task->watchports == NULL || !new_task->active) {
4622 		is_write_unlock(new_task->itk_space);
4623 		is_write_unlock(old_task->itk_space);
4624 		(void)task_watchports_release(new_watchports);
4625 		task_watchports_deallocate(new_watchports);
4626 		return;
4627 	}
4628 
4629 	old_watchports = old_task->watchports;
4630 	assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4631 
4632 	/* Setup new task watchports */
4633 	new_task->watchports = new_watchports;
4634 
4635 	for (uint32_t i = 0; i < portwatch_count; i++) {
4636 		ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4637 
4638 		if (port == NULL) {
4639 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4640 			continue;
4641 		}
4642 
4643 		/* Lock the port and check if it has the entry */
4644 		ip_mq_lock(port);
4645 
4646 		task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4647 
4648 		if (ipc_port_replace_watchport_elem_conditional_locked(port,
4649 		    &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4650 			task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4651 
4652 			task_watchports_retain(new_watchports);
4653 			old_refs = task_watchports_release(old_watchports);
4654 
4655 			/* Check if all ports are cleaned */
4656 			if (old_refs == 0) {
4657 				old_task->watchports = NULL;
4658 			}
4659 		} else {
4660 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4661 		}
4662 		/* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4663 	}
4664 
4665 	/* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4666 	new_refs = task_watchports_release(new_watchports);
4667 	if (new_refs == 0) {
4668 		new_task->watchports = NULL;
4669 	}
4670 
4671 	is_write_unlock(new_task->itk_space);
4672 	is_write_unlock(old_task->itk_space);
4673 
4674 	/* Clear the task and thread references for old_watchport */
4675 	if (old_refs == 0) {
4676 		task_watchports_deallocate(old_watchports);
4677 	}
4678 
4679 	/* Clear the task and thread references for new_watchport */
4680 	if (new_refs == 0) {
4681 		task_watchports_deallocate(new_watchports);
4682 	}
4683 }
4684 
4685 /*
4686  *	task_add_turnstile_watchports_locked:
4687  *		Setup watchports to boost the main thread of the task.
4688  *
4689  *	Arguments:
4690  *		task: task to boost
4691  *		watchports: watchport structure to be attached to the task
4692  *		previous_elem_array: an array of old watchport_elem to be returned to caller
4693  *		portwatch_ports: array of watchports
4694  *		portwatch_count: number of watchports
4695  *
4696  *	Conditions:
4697  *		ipc space of the task locked.
4698  *		returns array of old watchport_elem in previous_elem_array
4699  */
4700 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4701 task_add_turnstile_watchports_locked(
4702 	task_t                      task,
4703 	struct task_watchports      *watchports,
4704 	struct task_watchport_elem  **previous_elem_array,
4705 	ipc_port_t                  *portwatch_ports,
4706 	uint32_t                    portwatch_count)
4707 {
4708 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4709 
4710 	/* Check if the task is still active */
4711 	if (!task->active) {
4712 		refs = task_watchports_release(watchports);
4713 		return refs;
4714 	}
4715 
4716 	assert(task->watchports == NULL);
4717 	task->watchports = watchports;
4718 
4719 	for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4720 		ipc_port_t port = portwatch_ports[i];
4721 
4722 		task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4723 		if (port == NULL) {
4724 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4725 			continue;
4726 		}
4727 
4728 		ip_mq_lock(port);
4729 
4730 		/* Check if port is in valid state to be setup as watchport */
4731 		if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4732 		    &previous_elem_array[j]) != KERN_SUCCESS) {
4733 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4734 			continue;
4735 		}
4736 		/* port unlocked on return */
4737 
4738 		ip_reference(port);
4739 		task_watchports_retain(watchports);
4740 		if (previous_elem_array[j] != NULL) {
4741 			j++;
4742 		}
4743 	}
4744 
4745 	/* Drop the reference on task_watchport struct returned by os_ref_init */
4746 	refs = task_watchports_release(watchports);
4747 	if (refs == 0) {
4748 		task->watchports = NULL;
4749 	}
4750 
4751 	return refs;
4752 }
4753 
4754 /*
4755  *	task_remove_turnstile_watchports_locked:
4756  *		Clear all turnstile boost on the task from watchports.
4757  *
4758  *	Arguments:
4759  *		task: task to remove watchports from
4760  *		watchports: watchports structure for the task
4761  *		port_freelist: array of ports returned with ref to caller
4762  *
4763  *
4764  *	Conditions:
4765  *		ipc space of the task locked.
4766  *		array of ports with refs are returned in port_freelist
4767  */
4768 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4769 task_remove_turnstile_watchports_locked(
4770 	task_t                 task,
4771 	struct task_watchports *watchports,
4772 	ipc_port_t             *port_freelist)
4773 {
4774 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4775 
4776 	for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4777 		ipc_port_t port = watchports->tw_elem[i].twe_port;
4778 		if (port == NULL) {
4779 			continue;
4780 		}
4781 
4782 		/* Lock the port and check if it has the entry */
4783 		ip_mq_lock(port);
4784 		if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4785 		    &watchports->tw_elem[i]) == KERN_SUCCESS) {
4786 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4787 			port_freelist[j++] = port;
4788 			refs = task_watchports_release(watchports);
4789 
4790 			/* Check if all ports are cleaned */
4791 			if (refs == 0) {
4792 				task->watchports = NULL;
4793 				break;
4794 			}
4795 		}
4796 		/* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4797 	}
4798 	return refs;
4799 }
4800 
4801 /*
4802  *	task_watchports_alloc_init:
4803  *		Allocate and initialize task watchport struct.
4804  *
4805  *	Conditions:
4806  *		Nothing locked.
4807  */
4808 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4809 task_watchports_alloc_init(
4810 	task_t        task,
4811 	thread_t      thread,
4812 	uint32_t      count)
4813 {
4814 	struct task_watchports *watchports = kalloc_type(struct task_watchports,
4815 	    struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4816 
4817 	task_reference(task);
4818 	thread_reference(thread);
4819 	watchports->tw_task = task;
4820 	watchports->tw_thread = thread;
4821 	watchports->tw_elem_array_count = count;
4822 	os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4823 
4824 	return watchports;
4825 }
4826 
4827 /*
4828  *	task_watchports_deallocate:
4829  *		Deallocate task watchport struct.
4830  *
4831  *	Conditions:
4832  *		Nothing locked.
4833  */
4834 static void
task_watchports_deallocate(struct task_watchports * watchports)4835 task_watchports_deallocate(
4836 	struct task_watchports *watchports)
4837 {
4838 	uint32_t portwatch_count = watchports->tw_elem_array_count;
4839 
4840 	task_deallocate(watchports->tw_task);
4841 	thread_deallocate(watchports->tw_thread);
4842 	kfree_type(struct task_watchports, struct task_watchport_elem,
4843 	    portwatch_count, watchports);
4844 }
4845 
4846 /*
4847  *	task_watchport_elem_deallocate:
4848  *		Deallocate task watchport element and release its ref on task_watchport.
4849  *
4850  *	Conditions:
4851  *		Nothing locked.
4852  */
4853 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4854 task_watchport_elem_deallocate(
4855 	struct task_watchport_elem *watchport_elem)
4856 {
4857 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4858 	task_t task = watchport_elem->twe_task;
4859 	struct task_watchports *watchports = NULL;
4860 	ipc_port_t port = NULL;
4861 
4862 	assert(task != NULL);
4863 
4864 	/* Take the space lock to modify the elememt */
4865 	is_write_lock(task->itk_space);
4866 
4867 	watchports = task->watchports;
4868 	assert(watchports != NULL);
4869 
4870 	port = watchport_elem->twe_port;
4871 	assert(port != NULL);
4872 
4873 	task_watchport_elem_clear(watchport_elem);
4874 	refs = task_watchports_release(watchports);
4875 
4876 	if (refs == 0) {
4877 		task->watchports = NULL;
4878 	}
4879 
4880 	is_write_unlock(task->itk_space);
4881 
4882 	ip_release(port);
4883 	if (refs == 0) {
4884 		task_watchports_deallocate(watchports);
4885 	}
4886 }
4887 
4888 /*
4889  *	task_has_watchports:
4890  *		Return TRUE if task has watchport boosts.
4891  *
4892  *	Conditions:
4893  *		Nothing locked.
4894  */
4895 boolean_t
task_has_watchports(task_t task)4896 task_has_watchports(task_t task)
4897 {
4898 	return task->watchports != NULL;
4899 }
4900 
4901 #if DEVELOPMENT || DEBUG
4902 
4903 extern void IOSleep(int);
4904 
4905 kern_return_t
task_disconnect_page_mappings(task_t task)4906 task_disconnect_page_mappings(task_t task)
4907 {
4908 	int     n;
4909 
4910 	if (task == TASK_NULL || task == kernel_task) {
4911 		return KERN_INVALID_ARGUMENT;
4912 	}
4913 
4914 	/*
4915 	 * this function is used to strip all of the mappings from
4916 	 * the pmap for the specified task to force the task to
4917 	 * re-fault all of the pages it is actively using... this
4918 	 * allows us to approximate the true working set of the
4919 	 * specified task.  We only engage if at least 1 of the
4920 	 * threads in the task is runnable, but we want to continuously
4921 	 * sweep (at least for a while - I've arbitrarily set the limit at
4922 	 * 100 sweeps to be re-looked at as we gain experience) to get a better
4923 	 * view into what areas within a page are being visited (as opposed to only
4924 	 * seeing the first fault of a page after the task becomes
4925 	 * runnable)...  in the future I may
4926 	 * try to block until awakened by a thread in this task
4927 	 * being made runnable, but for now we'll periodically poll from the
4928 	 * user level debug tool driving the sysctl
4929 	 */
4930 	for (n = 0; n < 100; n++) {
4931 		thread_t        thread;
4932 		boolean_t       runnable;
4933 		boolean_t       do_unnest;
4934 		int             page_count;
4935 
4936 		runnable = FALSE;
4937 		do_unnest = FALSE;
4938 
4939 		task_lock(task);
4940 
4941 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
4942 			if (thread->state & TH_RUN) {
4943 				runnable = TRUE;
4944 				break;
4945 			}
4946 		}
4947 		if (n == 0) {
4948 			task->task_disconnected_count++;
4949 		}
4950 
4951 		if (task->task_unnested == FALSE) {
4952 			if (runnable == TRUE) {
4953 				task->task_unnested = TRUE;
4954 				do_unnest = TRUE;
4955 			}
4956 		}
4957 		task_unlock(task);
4958 
4959 		if (runnable == FALSE) {
4960 			break;
4961 		}
4962 
4963 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4964 		    task, do_unnest, task->task_disconnected_count, 0, 0);
4965 
4966 		page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4967 
4968 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4969 		    task, page_count, 0, 0, 0);
4970 
4971 		if ((n % 5) == 4) {
4972 			IOSleep(1);
4973 		}
4974 	}
4975 	return KERN_SUCCESS;
4976 }
4977 
4978 #endif
4979 
4980 
4981 #if CONFIG_FREEZE
4982 
4983 /*
4984  *	task_freeze:
4985  *
4986  *	Freeze a task.
4987  *
4988  * Conditions:
4989  *      The caller holds a reference to the task
4990  */
4991 extern void     vm_wake_compactor_swapper(void);
4992 extern struct freezer_context freezer_context_global;
4993 
4994 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4995 task_freeze(
4996 	task_t    task,
4997 	uint32_t           *purgeable_count,
4998 	uint32_t           *wired_count,
4999 	uint32_t           *clean_count,
5000 	uint32_t           *dirty_count,
5001 	uint32_t           dirty_budget,
5002 	uint32_t           *shared_count,
5003 	int                *freezer_error_code,
5004 	boolean_t          eval_only)
5005 {
5006 	kern_return_t kr = KERN_SUCCESS;
5007 
5008 	if (task == TASK_NULL || task == kernel_task) {
5009 		return KERN_INVALID_ARGUMENT;
5010 	}
5011 
5012 	task_lock(task);
5013 
5014 	while (task->changing_freeze_state) {
5015 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5016 		task_unlock(task);
5017 		thread_block(THREAD_CONTINUE_NULL);
5018 
5019 		task_lock(task);
5020 	}
5021 	if (task->frozen) {
5022 		task_unlock(task);
5023 		return KERN_FAILURE;
5024 	}
5025 	task->changing_freeze_state = TRUE;
5026 
5027 	freezer_context_global.freezer_ctx_task = task;
5028 
5029 	task_unlock(task);
5030 
5031 	kr = vm_map_freeze(task,
5032 	    purgeable_count,
5033 	    wired_count,
5034 	    clean_count,
5035 	    dirty_count,
5036 	    dirty_budget,
5037 	    shared_count,
5038 	    freezer_error_code,
5039 	    eval_only);
5040 
5041 	task_lock(task);
5042 
5043 	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
5044 		task->frozen = TRUE;
5045 
5046 		freezer_context_global.freezer_ctx_task = NULL;
5047 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
5048 
5049 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
5050 			/*
5051 			 * reset the counter tracking the # of swapped compressed pages
5052 			 * because we are now done with this freeze session and task.
5053 			 */
5054 
5055 			*dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64);         /*used to track pageouts*/
5056 		}
5057 
5058 		freezer_context_global.freezer_ctx_swapped_bytes = 0;
5059 	}
5060 
5061 	task->changing_freeze_state = FALSE;
5062 	thread_wakeup(&task->changing_freeze_state);
5063 
5064 	task_unlock(task);
5065 
5066 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
5067 	    (kr == KERN_SUCCESS) &&
5068 	    (eval_only == FALSE)) {
5069 		vm_wake_compactor_swapper();
5070 		/*
5071 		 * We do an explicit wakeup of the swapout thread here
5072 		 * because the compact_and_swap routines don't have
5073 		 * knowledge about these kind of "per-task packed c_segs"
5074 		 * and so will not be evaluating whether we need to do
5075 		 * a wakeup there.
5076 		 */
5077 		thread_wakeup((event_t)&vm_swapout_thread);
5078 	}
5079 
5080 	return kr;
5081 }
5082 
5083 /*
5084  *	task_thaw:
5085  *
5086  *	Thaw a currently frozen task.
5087  *
5088  * Conditions:
5089  *      The caller holds a reference to the task
5090  */
5091 kern_return_t
task_thaw(task_t task)5092 task_thaw(
5093 	task_t          task)
5094 {
5095 	if (task == TASK_NULL || task == kernel_task) {
5096 		return KERN_INVALID_ARGUMENT;
5097 	}
5098 
5099 	task_lock(task);
5100 
5101 	while (task->changing_freeze_state) {
5102 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5103 		task_unlock(task);
5104 		thread_block(THREAD_CONTINUE_NULL);
5105 
5106 		task_lock(task);
5107 	}
5108 	if (!task->frozen) {
5109 		task_unlock(task);
5110 		return KERN_FAILURE;
5111 	}
5112 	task->frozen = FALSE;
5113 
5114 	task_unlock(task);
5115 
5116 	return KERN_SUCCESS;
5117 }
5118 
5119 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)5120 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
5121 {
5122 	/*
5123 	 * We don't assert that the task lock is held because we call this
5124 	 * routine from the decompression path and we won't be holding the
5125 	 * task lock. However, since we are in the context of the task we are
5126 	 * safe.
5127 	 * In the case of the task_freeze path, we call it from behind the task
5128 	 * lock but we don't need to because we have a reference on the proc
5129 	 * being frozen.
5130 	 */
5131 
5132 	assert(task);
5133 	if (amount == 0) {
5134 		return;
5135 	}
5136 
5137 	if (op == CREDIT_TO_SWAP) {
5138 		ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5139 	} else if (op == DEBIT_FROM_SWAP) {
5140 		ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5141 	} else {
5142 		panic("task_update_frozen_to_swap_acct: Invalid ledger op");
5143 	}
5144 }
5145 #endif /* CONFIG_FREEZE */
5146 
5147 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)5148 task_set_security_tokens(
5149 	task_t           task,
5150 	security_token_t sec_token,
5151 	audit_token_t    audit_token,
5152 	host_priv_t      host_priv)
5153 {
5154 	ipc_port_t       host_port = IP_NULL;
5155 	kern_return_t    kr;
5156 
5157 	if (task == TASK_NULL) {
5158 		return KERN_INVALID_ARGUMENT;
5159 	}
5160 
5161 	task_lock(task);
5162 	task_set_tokens(task, &sec_token, &audit_token);
5163 	task_unlock(task);
5164 
5165 	if (host_priv != HOST_PRIV_NULL) {
5166 		kr = host_get_host_priv_port(host_priv, &host_port);
5167 	} else {
5168 		kr = host_get_host_port(host_priv_self(), &host_port);
5169 	}
5170 	assert(kr == KERN_SUCCESS);
5171 
5172 	kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
5173 	return kr;
5174 }
5175 
5176 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)5177 task_send_trace_memory(
5178 	__unused task_t   target_task,
5179 	__unused uint32_t pid,
5180 	__unused uint64_t uniqueid)
5181 {
5182 	return KERN_INVALID_ARGUMENT;
5183 }
5184 
5185 /*
5186  * This routine was added, pretty much exclusively, for registering the
5187  * RPC glue vector for in-kernel short circuited tasks.  Rather than
5188  * removing it completely, I have only disabled that feature (which was
5189  * the only feature at the time).  It just appears that we are going to
5190  * want to add some user data to tasks in the future (i.e. bsd info,
5191  * task names, etc...), so I left it in the formal task interface.
5192  */
5193 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)5194 task_set_info(
5195 	task_t          task,
5196 	task_flavor_t   flavor,
5197 	__unused task_info_t    task_info_in,           /* pointer to IN array */
5198 	__unused mach_msg_type_number_t task_info_count)
5199 {
5200 	if (task == TASK_NULL) {
5201 		return KERN_INVALID_ARGUMENT;
5202 	}
5203 	switch (flavor) {
5204 #if CONFIG_ATM
5205 	case TASK_TRACE_MEMORY_INFO:
5206 		return KERN_NOT_SUPPORTED;
5207 #endif // CONFIG_ATM
5208 	default:
5209 		return KERN_INVALID_ARGUMENT;
5210 	}
5211 }
5212 
5213 static void
_task_fill_times(task_t task,time_value_t * user_time,time_value_t * sys_time)5214 _task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
5215 {
5216 	clock_sec_t sec;
5217 	clock_usec_t usec;
5218 
5219 	struct recount_times_mach times = recount_task_terminated_times(task);
5220 	absolutetime_to_microtime(times.rtm_user, &sec, &usec);
5221 	user_time->seconds = (typeof(user_time->seconds))sec;
5222 	user_time->microseconds = usec;
5223 	absolutetime_to_microtime(times.rtm_system, &sec, &usec);
5224 	sys_time->seconds = (typeof(sys_time->seconds))sec;
5225 	sys_time->microseconds = usec;
5226 }
5227 
5228 int radar_20146450 = 1;
5229 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5230 task_info(
5231 	task_t                  task,
5232 	task_flavor_t           flavor,
5233 	task_info_t             task_info_out,
5234 	mach_msg_type_number_t  *task_info_count)
5235 {
5236 	kern_return_t error = KERN_SUCCESS;
5237 	mach_msg_type_number_t  original_task_info_count;
5238 	bool is_kernel_task = (task == kernel_task);
5239 
5240 	if (task == TASK_NULL) {
5241 		return KERN_INVALID_ARGUMENT;
5242 	}
5243 
5244 	original_task_info_count = *task_info_count;
5245 	task_lock(task);
5246 
5247 	if (task != current_task() && !task->active) {
5248 		task_unlock(task);
5249 		return KERN_INVALID_ARGUMENT;
5250 	}
5251 
5252 
5253 	switch (flavor) {
5254 	case TASK_BASIC_INFO_32:
5255 	case TASK_BASIC2_INFO_32:
5256 #if defined(__arm64__)
5257 	case TASK_BASIC_INFO_64:
5258 #endif
5259 		{
5260 			task_basic_info_32_t basic_info;
5261 			ledger_amount_t      tmp;
5262 
5263 			if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
5264 				error = KERN_INVALID_ARGUMENT;
5265 				break;
5266 			}
5267 
5268 			basic_info = (task_basic_info_32_t)task_info_out;
5269 
5270 			basic_info->virtual_size = (typeof(basic_info->virtual_size))
5271 			    vm_map_adjusted_size(is_kernel_task ? kernel_map : task->map);
5272 			if (flavor == TASK_BASIC2_INFO_32) {
5273 				/*
5274 				 * The "BASIC2" flavor gets the maximum resident
5275 				 * size instead of the current resident size...
5276 				 */
5277 				ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
5278 			} else {
5279 				ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
5280 			}
5281 			basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
5282 
5283 			_task_fill_times(task, &basic_info->user_time,
5284 			    &basic_info->system_time);
5285 
5286 			basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5287 			basic_info->suspend_count = task->user_stop_count;
5288 
5289 			*task_info_count = TASK_BASIC_INFO_32_COUNT;
5290 			break;
5291 		}
5292 
5293 #if defined(__arm64__)
5294 	case TASK_BASIC_INFO_64_2:
5295 	{
5296 		task_basic_info_64_2_t  basic_info;
5297 
5298 		if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
5299 			error = KERN_INVALID_ARGUMENT;
5300 			break;
5301 		}
5302 
5303 		basic_info = (task_basic_info_64_2_t)task_info_out;
5304 
5305 		basic_info->virtual_size  = vm_map_adjusted_size(is_kernel_task ?
5306 		    kernel_map : task->map);
5307 		ledger_get_balance(task->ledger, task_ledgers.phys_mem,
5308 		    (ledger_amount_t *)&basic_info->resident_size);
5309 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5310 		basic_info->suspend_count = task->user_stop_count;
5311 		_task_fill_times(task, &basic_info->user_time,
5312 		    &basic_info->system_time);
5313 
5314 		*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
5315 		break;
5316 	}
5317 
5318 #else /* defined(__arm64__) */
5319 	case TASK_BASIC_INFO_64:
5320 	{
5321 		task_basic_info_64_t basic_info;
5322 
5323 		if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5324 			error = KERN_INVALID_ARGUMENT;
5325 			break;
5326 		}
5327 
5328 		basic_info = (task_basic_info_64_t)task_info_out;
5329 
5330 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5331 		    kernel_map : task->map);
5332 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5333 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5334 		basic_info->suspend_count = task->user_stop_count;
5335 		_task_fill_times(task, &basic_info->user_time,
5336 		    &basic_info->system_time);
5337 
5338 		*task_info_count = TASK_BASIC_INFO_64_COUNT;
5339 		break;
5340 	}
5341 #endif /* defined(__arm64__) */
5342 
5343 	case MACH_TASK_BASIC_INFO:
5344 	{
5345 		mach_task_basic_info_t  basic_info;
5346 
5347 		if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5348 			error = KERN_INVALID_ARGUMENT;
5349 			break;
5350 		}
5351 
5352 		basic_info = (mach_task_basic_info_t)task_info_out;
5353 
5354 		basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5355 		    kernel_map : task->map);
5356 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
5357 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
5358 		basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5359 		basic_info->suspend_count = task->user_stop_count;
5360 		_task_fill_times(task, &basic_info->user_time,
5361 		    &basic_info->system_time);
5362 
5363 		*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5364 		break;
5365 	}
5366 
5367 	case TASK_THREAD_TIMES_INFO:
5368 	{
5369 		task_thread_times_info_t times_info;
5370 		thread_t                 thread;
5371 
5372 		if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5373 			error = KERN_INVALID_ARGUMENT;
5374 			break;
5375 		}
5376 
5377 		times_info = (task_thread_times_info_t)task_info_out;
5378 		times_info->user_time = (time_value_t){ 0 };
5379 		times_info->system_time = (time_value_t){ 0 };
5380 
5381 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5382 			if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5383 				time_value_t user_time, system_time;
5384 
5385 				thread_read_times(thread, &user_time, &system_time, NULL);
5386 				time_value_add(&times_info->user_time, &user_time);
5387 				time_value_add(&times_info->system_time, &system_time);
5388 			}
5389 		}
5390 
5391 		*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5392 		break;
5393 	}
5394 
5395 	case TASK_ABSOLUTETIME_INFO:
5396 	{
5397 		task_absolutetime_info_t        info;
5398 
5399 		if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5400 			error = KERN_INVALID_ARGUMENT;
5401 			break;
5402 		}
5403 
5404 		info = (task_absolutetime_info_t)task_info_out;
5405 
5406 		struct recount_times_mach term_times =
5407 		    recount_task_terminated_times(task);
5408 		struct recount_times_mach total_times = recount_task_times(task);
5409 
5410 		info->total_user = total_times.rtm_user;
5411 		info->total_system = total_times.rtm_system;
5412 		info->threads_user = total_times.rtm_user - term_times.rtm_user;
5413 		info->threads_system += total_times.rtm_system - term_times.rtm_system;
5414 
5415 		*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5416 		break;
5417 	}
5418 
5419 	case TASK_DYLD_INFO:
5420 	{
5421 		task_dyld_info_t info;
5422 
5423 		/*
5424 		 * We added the format field to TASK_DYLD_INFO output.  For
5425 		 * temporary backward compatibility, accept the fact that
5426 		 * clients may ask for the old version - distinquished by the
5427 		 * size of the expected result structure.
5428 		 */
5429 #define TASK_LEGACY_DYLD_INFO_COUNT \
5430 	        offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5431 
5432 		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5433 			error = KERN_INVALID_ARGUMENT;
5434 			break;
5435 		}
5436 
5437 		info = (task_dyld_info_t)task_info_out;
5438 		info->all_image_info_addr = task->all_image_info_addr;
5439 		info->all_image_info_size = task->all_image_info_size;
5440 
5441 		/* only set format on output for those expecting it */
5442 		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5443 			info->all_image_info_format = task_has_64Bit_addr(task) ?
5444 			    TASK_DYLD_ALL_IMAGE_INFO_64 :
5445 			    TASK_DYLD_ALL_IMAGE_INFO_32;
5446 			*task_info_count = TASK_DYLD_INFO_COUNT;
5447 		} else {
5448 			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5449 		}
5450 		break;
5451 	}
5452 
5453 	case TASK_EXTMOD_INFO:
5454 	{
5455 		task_extmod_info_t info;
5456 		void *p;
5457 
5458 		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5459 			error = KERN_INVALID_ARGUMENT;
5460 			break;
5461 		}
5462 
5463 		info = (task_extmod_info_t)task_info_out;
5464 
5465 		p = get_bsdtask_info(task);
5466 		if (p) {
5467 			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5468 		} else {
5469 			bzero(info->task_uuid, sizeof(info->task_uuid));
5470 		}
5471 		info->extmod_statistics = task->extmod_statistics;
5472 		*task_info_count = TASK_EXTMOD_INFO_COUNT;
5473 
5474 		break;
5475 	}
5476 
5477 	case TASK_KERNELMEMORY_INFO:
5478 	{
5479 		task_kernelmemory_info_t        tkm_info;
5480 		ledger_amount_t                 credit, debit;
5481 
5482 		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5483 			error = KERN_INVALID_ARGUMENT;
5484 			break;
5485 		}
5486 
5487 		tkm_info = (task_kernelmemory_info_t) task_info_out;
5488 		tkm_info->total_palloc = 0;
5489 		tkm_info->total_pfree = 0;
5490 		tkm_info->total_salloc = 0;
5491 		tkm_info->total_sfree = 0;
5492 
5493 		if (task == kernel_task) {
5494 			/*
5495 			 * All shared allocs/frees from other tasks count against
5496 			 * the kernel private memory usage.  If we are looking up
5497 			 * info for the kernel task, gather from everywhere.
5498 			 */
5499 			task_unlock(task);
5500 
5501 			/* start by accounting for all the terminated tasks against the kernel */
5502 			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5503 			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5504 
5505 			/* count all other task/thread shared alloc/free against the kernel */
5506 			lck_mtx_lock(&tasks_threads_lock);
5507 
5508 			/* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5509 			queue_iterate(&tasks, task, task_t, tasks) {
5510 				if (task == kernel_task) {
5511 					if (ledger_get_entries(task->ledger,
5512 					    task_ledgers.tkm_private, &credit,
5513 					    &debit) == KERN_SUCCESS) {
5514 						tkm_info->total_palloc += credit;
5515 						tkm_info->total_pfree += debit;
5516 					}
5517 				}
5518 				if (!ledger_get_entries(task->ledger,
5519 				    task_ledgers.tkm_shared, &credit, &debit)) {
5520 					tkm_info->total_palloc += credit;
5521 					tkm_info->total_pfree += debit;
5522 				}
5523 			}
5524 			lck_mtx_unlock(&tasks_threads_lock);
5525 		} else {
5526 			if (!ledger_get_entries(task->ledger,
5527 			    task_ledgers.tkm_private, &credit, &debit)) {
5528 				tkm_info->total_palloc = credit;
5529 				tkm_info->total_pfree = debit;
5530 			}
5531 			if (!ledger_get_entries(task->ledger,
5532 			    task_ledgers.tkm_shared, &credit, &debit)) {
5533 				tkm_info->total_salloc = credit;
5534 				tkm_info->total_sfree = debit;
5535 			}
5536 			task_unlock(task);
5537 		}
5538 
5539 		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5540 		return KERN_SUCCESS;
5541 	}
5542 
5543 	/* OBSOLETE */
5544 	case TASK_SCHED_FIFO_INFO:
5545 	{
5546 		if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5547 			error = KERN_INVALID_ARGUMENT;
5548 			break;
5549 		}
5550 
5551 		error = KERN_INVALID_POLICY;
5552 		break;
5553 	}
5554 
5555 	/* OBSOLETE */
5556 	case TASK_SCHED_RR_INFO:
5557 	{
5558 		policy_rr_base_t        rr_base;
5559 		uint32_t quantum_time;
5560 		uint64_t quantum_ns;
5561 
5562 		if (*task_info_count < POLICY_RR_BASE_COUNT) {
5563 			error = KERN_INVALID_ARGUMENT;
5564 			break;
5565 		}
5566 
5567 		rr_base = (policy_rr_base_t) task_info_out;
5568 
5569 		if (task != kernel_task) {
5570 			error = KERN_INVALID_POLICY;
5571 			break;
5572 		}
5573 
5574 		rr_base->base_priority = task->priority;
5575 
5576 		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5577 		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5578 
5579 		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5580 
5581 		*task_info_count = POLICY_RR_BASE_COUNT;
5582 		break;
5583 	}
5584 
5585 	/* OBSOLETE */
5586 	case TASK_SCHED_TIMESHARE_INFO:
5587 	{
5588 		policy_timeshare_base_t ts_base;
5589 
5590 		if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5591 			error = KERN_INVALID_ARGUMENT;
5592 			break;
5593 		}
5594 
5595 		ts_base = (policy_timeshare_base_t) task_info_out;
5596 
5597 		if (task == kernel_task) {
5598 			error = KERN_INVALID_POLICY;
5599 			break;
5600 		}
5601 
5602 		ts_base->base_priority = task->priority;
5603 
5604 		*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5605 		break;
5606 	}
5607 
5608 	case TASK_SECURITY_TOKEN:
5609 	{
5610 		security_token_t        *sec_token_p;
5611 
5612 		if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5613 			error = KERN_INVALID_ARGUMENT;
5614 			break;
5615 		}
5616 
5617 		sec_token_p = (security_token_t *) task_info_out;
5618 
5619 		*sec_token_p = *task_get_sec_token(task);
5620 
5621 		*task_info_count = TASK_SECURITY_TOKEN_COUNT;
5622 		break;
5623 	}
5624 
5625 	case TASK_AUDIT_TOKEN:
5626 	{
5627 		audit_token_t   *audit_token_p;
5628 
5629 		if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5630 			error = KERN_INVALID_ARGUMENT;
5631 			break;
5632 		}
5633 
5634 		audit_token_p = (audit_token_t *) task_info_out;
5635 
5636 		*audit_token_p = *task_get_audit_token(task);
5637 
5638 		*task_info_count = TASK_AUDIT_TOKEN_COUNT;
5639 		break;
5640 	}
5641 
5642 	case TASK_SCHED_INFO:
5643 		error = KERN_INVALID_ARGUMENT;
5644 		break;
5645 
5646 	case TASK_EVENTS_INFO:
5647 	{
5648 		task_events_info_t      events_info;
5649 		thread_t                thread;
5650 		uint64_t                n_syscalls_mach, n_syscalls_unix, n_csw;
5651 
5652 		if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5653 			error = KERN_INVALID_ARGUMENT;
5654 			break;
5655 		}
5656 
5657 		events_info = (task_events_info_t) task_info_out;
5658 
5659 
5660 		events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5661 		events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5662 		events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5663 		events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5664 		events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5665 
5666 		n_syscalls_mach = task->syscalls_mach;
5667 		n_syscalls_unix = task->syscalls_unix;
5668 		n_csw = task->c_switch;
5669 
5670 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5671 			n_csw           += thread->c_switch;
5672 			n_syscalls_mach += thread->syscalls_mach;
5673 			n_syscalls_unix += thread->syscalls_unix;
5674 		}
5675 
5676 		events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5677 		events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5678 		events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5679 
5680 		*task_info_count = TASK_EVENTS_INFO_COUNT;
5681 		break;
5682 	}
5683 	case TASK_AFFINITY_TAG_INFO:
5684 	{
5685 		if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5686 			error = KERN_INVALID_ARGUMENT;
5687 			break;
5688 		}
5689 
5690 		error = task_affinity_info(task, task_info_out, task_info_count);
5691 		break;
5692 	}
5693 	case TASK_POWER_INFO:
5694 	{
5695 		if (*task_info_count < TASK_POWER_INFO_COUNT) {
5696 			error = KERN_INVALID_ARGUMENT;
5697 			break;
5698 		}
5699 
5700 		task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5701 		break;
5702 	}
5703 
5704 	case TASK_POWER_INFO_V2:
5705 	{
5706 		if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5707 			error = KERN_INVALID_ARGUMENT;
5708 			break;
5709 		}
5710 		task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5711 		task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5712 		break;
5713 	}
5714 
5715 	case TASK_VM_INFO:
5716 	case TASK_VM_INFO_PURGEABLE:
5717 	{
5718 		task_vm_info_t          vm_info;
5719 		vm_map_t                map;
5720 		ledger_amount_t         tmp_amount;
5721 
5722 		struct proc *p;
5723 		uint32_t platform, sdk;
5724 		p = current_proc();
5725 		platform = proc_platform(p);
5726 		sdk = proc_sdk(p);
5727 		if (original_task_info_count > TASK_VM_INFO_COUNT) {
5728 			/*
5729 			 * Some iOS apps pass an incorrect value for
5730 			 * task_info_count, expressed in number of bytes
5731 			 * instead of number of "natural_t" elements, which
5732 			 * can lead to binary compatibility issues (including
5733 			 * stack corruption) when the data structure is
5734 			 * expanded in the future.
5735 			 * Let's make this potential issue visible by
5736 			 * logging about it...
5737 			 */
5738 			printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5739 			    "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5740 			    "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5741 			    __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5742 			    flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5743 			    platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5744 			DTRACE_VM4(suspicious_task_vm_info_count,
5745 			    mach_msg_type_number_t, original_task_info_count,
5746 			    mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5747 			    uint32_t, platform,
5748 			    uint32_t, sdk);
5749 		}
5750 #if __arm64__
5751 		if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5752 		    platform == PLATFORM_IOS &&
5753 		    sdk != 0 &&
5754 		    (sdk >> 16) <= 12) {
5755 			/*
5756 			 * Some iOS apps pass an incorrect value for
5757 			 * task_info_count, expressed in number of bytes
5758 			 * instead of number of "natural_t" elements.
5759 			 * For the sake of backwards binary compatibility
5760 			 * for apps built with an iOS12 or older SDK and using
5761 			 * the "rev2" data structure, let's fix task_info_count
5762 			 * for them, to avoid stomping past the actual end
5763 			 * of their buffer.
5764 			 */
5765 #if DEVELOPMENT || DEBUG
5766 			printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5767 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5768 			    proc_name_address(p), original_task_info_count,
5769 			    TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5770 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5771 #endif /* DEVELOPMENT || DEBUG */
5772 			DTRACE_VM4(workaround_task_vm_info_count,
5773 			    mach_msg_type_number_t, original_task_info_count,
5774 			    mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5775 			    uint32_t, platform,
5776 			    uint32_t, sdk);
5777 			original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5778 			*task_info_count = original_task_info_count;
5779 		}
5780 		if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5781 		    platform == PLATFORM_IOS &&
5782 		    sdk != 0 &&
5783 		    (sdk >> 16) <= 15) {
5784 			/*
5785 			 * Some iOS apps pass an incorrect value for
5786 			 * task_info_count, expressed in number of bytes
5787 			 * instead of number of "natural_t" elements.
5788 			 */
5789 			printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5790 			    "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5791 			    proc_name_address(p), original_task_info_count,
5792 			    TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5793 			    ((sdk >> 8) & 0xff), (sdk & 0xff));
5794 			DTRACE_VM4(workaround_task_vm_info_count,
5795 			    mach_msg_type_number_t, original_task_info_count,
5796 			    mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5797 			    uint32_t, platform,
5798 			    uint32_t, sdk);
5799 #if DEVELOPMENT || DEBUG
5800 			/*
5801 			 * For the sake of internal builds livability,
5802 			 * work around this user-space bug by capping the
5803 			 * buffer's size to what it was with the iOS15 SDK.
5804 			 */
5805 			original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5806 			*task_info_count = original_task_info_count;
5807 #endif /* DEVELOPMENT || DEBUG */
5808 		}
5809 #endif /* __arm64__ */
5810 
5811 		if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5812 			error = KERN_INVALID_ARGUMENT;
5813 			break;
5814 		}
5815 
5816 		vm_info = (task_vm_info_t)task_info_out;
5817 
5818 		/*
5819 		 * Do not hold both the task and map locks,
5820 		 * so convert the task lock into a map reference,
5821 		 * drop the task lock, then lock the map.
5822 		 */
5823 		if (is_kernel_task) {
5824 			map = kernel_map;
5825 			task_unlock(task);
5826 			/* no lock, no reference */
5827 		} else {
5828 			map = task->map;
5829 			vm_map_reference(map);
5830 			task_unlock(task);
5831 			vm_map_lock_read(map);
5832 		}
5833 
5834 		vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5835 		vm_info->region_count = map->hdr.nentries;
5836 		vm_info->page_size = vm_map_page_size(map);
5837 
5838 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5839 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5840 
5841 		vm_info->device = 0;
5842 		vm_info->device_peak = 0;
5843 		ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5844 		ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5845 		ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5846 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5847 		ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5848 		ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5849 		ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5850 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5851 		ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5852 
5853 		vm_info->purgeable_volatile_pmap = 0;
5854 		vm_info->purgeable_volatile_resident = 0;
5855 		vm_info->purgeable_volatile_virtual = 0;
5856 		if (is_kernel_task) {
5857 			/*
5858 			 * We do not maintain the detailed stats for the
5859 			 * kernel_pmap, so just count everything as
5860 			 * "internal"...
5861 			 */
5862 			vm_info->internal = vm_info->resident_size;
5863 			/*
5864 			 * ... but since the memory held by the VM compressor
5865 			 * in the kernel address space ought to be attributed
5866 			 * to user-space tasks, we subtract it from "internal"
5867 			 * to give memory reporting tools a more accurate idea
5868 			 * of what the kernel itself is actually using, instead
5869 			 * of making it look like the kernel is leaking memory
5870 			 * when the system is under memory pressure.
5871 			 */
5872 			vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5873 			    PAGE_SIZE);
5874 		} else {
5875 			mach_vm_size_t  volatile_virtual_size;
5876 			mach_vm_size_t  volatile_resident_size;
5877 			mach_vm_size_t  volatile_compressed_size;
5878 			mach_vm_size_t  volatile_pmap_size;
5879 			mach_vm_size_t  volatile_compressed_pmap_size;
5880 			kern_return_t   kr;
5881 
5882 			if (flavor == TASK_VM_INFO_PURGEABLE) {
5883 				kr = vm_map_query_volatile(
5884 					map,
5885 					&volatile_virtual_size,
5886 					&volatile_resident_size,
5887 					&volatile_compressed_size,
5888 					&volatile_pmap_size,
5889 					&volatile_compressed_pmap_size);
5890 				if (kr == KERN_SUCCESS) {
5891 					vm_info->purgeable_volatile_pmap =
5892 					    volatile_pmap_size;
5893 					if (radar_20146450) {
5894 						vm_info->compressed -=
5895 						    volatile_compressed_pmap_size;
5896 					}
5897 					vm_info->purgeable_volatile_resident =
5898 					    volatile_resident_size;
5899 					vm_info->purgeable_volatile_virtual =
5900 					    volatile_virtual_size;
5901 				}
5902 			}
5903 		}
5904 		*task_info_count = TASK_VM_INFO_REV0_COUNT;
5905 
5906 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5907 			/* must be captured while we still have the map lock */
5908 			vm_info->min_address = map->min_offset;
5909 			vm_info->max_address = map->max_offset;
5910 		}
5911 
5912 		/*
5913 		 * Done with vm map things, can drop the map lock and reference,
5914 		 * and take the task lock back.
5915 		 *
5916 		 * Re-validate that the task didn't die on us.
5917 		 */
5918 		if (!is_kernel_task) {
5919 			vm_map_unlock_read(map);
5920 			vm_map_deallocate(map);
5921 		}
5922 		map = VM_MAP_NULL;
5923 
5924 		task_lock(task);
5925 
5926 		if ((task != current_task()) && (!task->active)) {
5927 			error = KERN_INVALID_ARGUMENT;
5928 			break;
5929 		}
5930 
5931 		if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5932 			vm_info->phys_footprint =
5933 			    (mach_vm_size_t) get_task_phys_footprint(task);
5934 			*task_info_count = TASK_VM_INFO_REV1_COUNT;
5935 		}
5936 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5937 			/* data was captured above */
5938 			*task_info_count = TASK_VM_INFO_REV2_COUNT;
5939 		}
5940 
5941 		if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5942 			ledger_get_lifetime_max(task->ledger,
5943 			    task_ledgers.phys_footprint,
5944 			    &vm_info->ledger_phys_footprint_peak);
5945 			ledger_get_balance(task->ledger,
5946 			    task_ledgers.purgeable_nonvolatile,
5947 			    &vm_info->ledger_purgeable_nonvolatile);
5948 			ledger_get_balance(task->ledger,
5949 			    task_ledgers.purgeable_nonvolatile_compressed,
5950 			    &vm_info->ledger_purgeable_novolatile_compressed);
5951 			ledger_get_balance(task->ledger,
5952 			    task_ledgers.purgeable_volatile,
5953 			    &vm_info->ledger_purgeable_volatile);
5954 			ledger_get_balance(task->ledger,
5955 			    task_ledgers.purgeable_volatile_compressed,
5956 			    &vm_info->ledger_purgeable_volatile_compressed);
5957 			ledger_get_balance(task->ledger,
5958 			    task_ledgers.network_nonvolatile,
5959 			    &vm_info->ledger_tag_network_nonvolatile);
5960 			ledger_get_balance(task->ledger,
5961 			    task_ledgers.network_nonvolatile_compressed,
5962 			    &vm_info->ledger_tag_network_nonvolatile_compressed);
5963 			ledger_get_balance(task->ledger,
5964 			    task_ledgers.network_volatile,
5965 			    &vm_info->ledger_tag_network_volatile);
5966 			ledger_get_balance(task->ledger,
5967 			    task_ledgers.network_volatile_compressed,
5968 			    &vm_info->ledger_tag_network_volatile_compressed);
5969 			ledger_get_balance(task->ledger,
5970 			    task_ledgers.media_footprint,
5971 			    &vm_info->ledger_tag_media_footprint);
5972 			ledger_get_balance(task->ledger,
5973 			    task_ledgers.media_footprint_compressed,
5974 			    &vm_info->ledger_tag_media_footprint_compressed);
5975 			ledger_get_balance(task->ledger,
5976 			    task_ledgers.media_nofootprint,
5977 			    &vm_info->ledger_tag_media_nofootprint);
5978 			ledger_get_balance(task->ledger,
5979 			    task_ledgers.media_nofootprint_compressed,
5980 			    &vm_info->ledger_tag_media_nofootprint_compressed);
5981 			ledger_get_balance(task->ledger,
5982 			    task_ledgers.graphics_footprint,
5983 			    &vm_info->ledger_tag_graphics_footprint);
5984 			ledger_get_balance(task->ledger,
5985 			    task_ledgers.graphics_footprint_compressed,
5986 			    &vm_info->ledger_tag_graphics_footprint_compressed);
5987 			ledger_get_balance(task->ledger,
5988 			    task_ledgers.graphics_nofootprint,
5989 			    &vm_info->ledger_tag_graphics_nofootprint);
5990 			ledger_get_balance(task->ledger,
5991 			    task_ledgers.graphics_nofootprint_compressed,
5992 			    &vm_info->ledger_tag_graphics_nofootprint_compressed);
5993 			ledger_get_balance(task->ledger,
5994 			    task_ledgers.neural_footprint,
5995 			    &vm_info->ledger_tag_neural_footprint);
5996 			ledger_get_balance(task->ledger,
5997 			    task_ledgers.neural_footprint_compressed,
5998 			    &vm_info->ledger_tag_neural_footprint_compressed);
5999 			ledger_get_balance(task->ledger,
6000 			    task_ledgers.neural_nofootprint,
6001 			    &vm_info->ledger_tag_neural_nofootprint);
6002 			ledger_get_balance(task->ledger,
6003 			    task_ledgers.neural_nofootprint_compressed,
6004 			    &vm_info->ledger_tag_neural_nofootprint_compressed);
6005 			*task_info_count = TASK_VM_INFO_REV3_COUNT;
6006 		}
6007 		if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
6008 			if (get_bsdtask_info(task)) {
6009 				vm_info->limit_bytes_remaining =
6010 				    memorystatus_available_memory_internal(get_bsdtask_info(task));
6011 			} else {
6012 				vm_info->limit_bytes_remaining = 0;
6013 			}
6014 			*task_info_count = TASK_VM_INFO_REV4_COUNT;
6015 		}
6016 		if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
6017 			thread_t thread;
6018 			uint64_t total = task->decompressions;
6019 			queue_iterate(&task->threads, thread, thread_t, task_threads) {
6020 				total += thread->decompressions;
6021 			}
6022 			vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
6023 			*task_info_count = TASK_VM_INFO_REV5_COUNT;
6024 		}
6025 		if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
6026 			ledger_get_balance(task->ledger, task_ledgers.swapins,
6027 			    &vm_info->ledger_swapins);
6028 			*task_info_count = TASK_VM_INFO_REV6_COUNT;
6029 		}
6030 
6031 		break;
6032 	}
6033 
6034 	case TASK_WAIT_STATE_INFO:
6035 	{
6036 		/*
6037 		 * Deprecated flavor. Currently allowing some results until all users
6038 		 * stop calling it. The results may not be accurate.
6039 		 */
6040 		task_wait_state_info_t  wait_state_info;
6041 		uint64_t total_sfi_ledger_val = 0;
6042 
6043 		if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
6044 			error = KERN_INVALID_ARGUMENT;
6045 			break;
6046 		}
6047 
6048 		wait_state_info = (task_wait_state_info_t) task_info_out;
6049 
6050 		wait_state_info->total_wait_state_time = 0;
6051 		bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
6052 
6053 #if CONFIG_SCHED_SFI
6054 		int i, prev_lentry = -1;
6055 		int64_t  val_credit, val_debit;
6056 
6057 		for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
6058 			val_credit = 0;
6059 			/*
6060 			 * checking with prev_lentry != entry ensures adjacent classes
6061 			 * which share the same ledger do not add wait times twice.
6062 			 * Note: Use ledger() call to get data for each individual sfi class.
6063 			 */
6064 			if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
6065 			    KERN_SUCCESS == ledger_get_entries(task->ledger,
6066 			    task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
6067 				total_sfi_ledger_val += val_credit;
6068 			}
6069 			prev_lentry = task_ledgers.sfi_wait_times[i];
6070 		}
6071 
6072 #endif /* CONFIG_SCHED_SFI */
6073 		wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
6074 		*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
6075 
6076 		break;
6077 	}
6078 	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
6079 	{
6080 #if DEVELOPMENT || DEBUG
6081 		pvm_account_info_t      acnt_info;
6082 
6083 		if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
6084 			error = KERN_INVALID_ARGUMENT;
6085 			break;
6086 		}
6087 
6088 		if (task_info_out == NULL) {
6089 			error = KERN_INVALID_ARGUMENT;
6090 			break;
6091 		}
6092 
6093 		acnt_info = (pvm_account_info_t) task_info_out;
6094 
6095 		error = vm_purgeable_account(task, acnt_info);
6096 
6097 		*task_info_count = PVM_ACCOUNT_INFO_COUNT;
6098 
6099 		break;
6100 #else /* DEVELOPMENT || DEBUG */
6101 		error = KERN_NOT_SUPPORTED;
6102 		break;
6103 #endif /* DEVELOPMENT || DEBUG */
6104 	}
6105 	case TASK_FLAGS_INFO:
6106 	{
6107 		task_flags_info_t               flags_info;
6108 
6109 		if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
6110 			error = KERN_INVALID_ARGUMENT;
6111 			break;
6112 		}
6113 
6114 		flags_info = (task_flags_info_t)task_info_out;
6115 
6116 		/* only publish the 64-bit flag of the task */
6117 		flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
6118 
6119 		*task_info_count = TASK_FLAGS_INFO_COUNT;
6120 		break;
6121 	}
6122 
6123 	case TASK_DEBUG_INFO_INTERNAL:
6124 	{
6125 #if DEVELOPMENT || DEBUG
6126 		task_debug_info_internal_t dbg_info;
6127 		ipc_space_t space = task->itk_space;
6128 		if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
6129 			error = KERN_NOT_SUPPORTED;
6130 			break;
6131 		}
6132 
6133 		if (task_info_out == NULL) {
6134 			error = KERN_INVALID_ARGUMENT;
6135 			break;
6136 		}
6137 		dbg_info = (task_debug_info_internal_t) task_info_out;
6138 		dbg_info->ipc_space_size = 0;
6139 
6140 		if (space) {
6141 			smr_ipc_enter();
6142 			ipc_entry_table_t table = smr_entered_load(&space->is_table);
6143 			if (table) {
6144 				dbg_info->ipc_space_size =
6145 				    ipc_entry_table_count(table);
6146 			}
6147 			smr_ipc_leave();
6148 		}
6149 
6150 		dbg_info->suspend_count = task->suspend_count;
6151 
6152 		error = KERN_SUCCESS;
6153 		*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
6154 		break;
6155 #else /* DEVELOPMENT || DEBUG */
6156 		error = KERN_NOT_SUPPORTED;
6157 		break;
6158 #endif /* DEVELOPMENT || DEBUG */
6159 	}
6160 	case TASK_SUSPEND_STATS_INFO:
6161 	{
6162 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6163 		if (*task_info_count < TASK_SUSPEND_STATS_INFO_COUNT || task_info_out == NULL) {
6164 			error = KERN_INVALID_ARGUMENT;
6165 			break;
6166 		}
6167 		error = _task_get_suspend_stats_locked(task, (task_suspend_stats_t)task_info_out);
6168 		*task_info_count = TASK_SUSPEND_STATS_INFO_COUNT;
6169 		break;
6170 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6171 		error = KERN_NOT_SUPPORTED;
6172 		break;
6173 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6174 	}
6175 	case TASK_SUSPEND_SOURCES_INFO:
6176 	{
6177 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6178 		if (*task_info_count < TASK_SUSPEND_SOURCES_INFO_COUNT || task_info_out == NULL) {
6179 			error = KERN_INVALID_ARGUMENT;
6180 			break;
6181 		}
6182 		error = _task_get_suspend_sources_locked(task, (task_suspend_source_t)task_info_out);
6183 		*task_info_count = TASK_SUSPEND_SOURCES_INFO_COUNT;
6184 		break;
6185 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6186 		error = KERN_NOT_SUPPORTED;
6187 		break;
6188 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6189 	}
6190 	default:
6191 		error = KERN_INVALID_ARGUMENT;
6192 	}
6193 
6194 	task_unlock(task);
6195 	return error;
6196 }
6197 
6198 /*
6199  * task_info_from_user
6200  *
6201  * When calling task_info from user space,
6202  * this function will be executed as mig server side
6203  * instead of calling directly into task_info.
6204  * This gives the possibility to perform more security
6205  * checks on task_port.
6206  *
6207  * In the case of TASK_DYLD_INFO, we require the more
6208  * privileged task_read_port not the less-privileged task_name_port.
6209  *
6210  */
6211 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)6212 task_info_from_user(
6213 	mach_port_t             task_port,
6214 	task_flavor_t           flavor,
6215 	task_info_t             task_info_out,
6216 	mach_msg_type_number_t  *task_info_count)
6217 {
6218 	task_t task;
6219 	kern_return_t ret;
6220 
6221 	if (flavor == TASK_DYLD_INFO) {
6222 		task = convert_port_to_task_read(task_port);
6223 	} else {
6224 		task = convert_port_to_task_name(task_port);
6225 	}
6226 
6227 	ret = task_info(task, flavor, task_info_out, task_info_count);
6228 
6229 	task_deallocate(task);
6230 
6231 	return ret;
6232 }
6233 
6234 /*
6235  * Routine: task_dyld_process_info_update_helper
6236  *
6237  * Release send rights in release_ports.
6238  *
6239  * If no active ports found in task's dyld notifier array, unset the magic value
6240  * in user space to indicate so.
6241  *
6242  * Condition:
6243  *      task's itk_lock is locked, and is unlocked upon return.
6244  *      Global g_dyldinfo_mtx is locked, and is unlocked upon return.
6245  */
6246 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)6247 task_dyld_process_info_update_helper(
6248 	task_t                  task,
6249 	size_t                  active_count,
6250 	vm_map_address_t        magic_addr,    /* a userspace address */
6251 	ipc_port_t             *release_ports,
6252 	size_t                  release_count)
6253 {
6254 	void *notifiers_ptr = NULL;
6255 
6256 	assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
6257 
6258 	if (active_count == 0) {
6259 		assert(task->itk_dyld_notify != NULL);
6260 		notifiers_ptr = task->itk_dyld_notify;
6261 		task->itk_dyld_notify = NULL;
6262 		itk_unlock(task);
6263 
6264 		kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6265 		(void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
6266 	} else {
6267 		itk_unlock(task);
6268 		(void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
6269 		    magic_addr);     /* reset magic */
6270 	}
6271 
6272 	lck_mtx_unlock(&g_dyldinfo_mtx);
6273 
6274 	for (size_t i = 0; i < release_count; i++) {
6275 		ipc_port_release_send(release_ports[i]);
6276 	}
6277 }
6278 
6279 /*
6280  * Routine: task_dyld_process_info_notify_register
6281  *
6282  * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
6283  * memory for the array if it's the first port to be registered. Also cleanup
6284  * any dead rights found in the array.
6285  *
6286  * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
6287  *
6288  * Args:
6289  *     task:   Target task for the registration.
6290  *     sright: A send right.
6291  *
6292  * Returns:
6293  *     KERN_SUCCESS: Registration succeeded.
6294  *     KERN_INVALID_TASK: task is invalid.
6295  *     KERN_INVALID_RIGHT: sright is invalid.
6296  *     KERN_DENIED: Security policy denied this call.
6297  *     KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
6298  *     KERN_NO_SPACE: No available notifier port slot left for this task.
6299  *     KERN_RIGHT_EXISTS: The notifier port is already registered and active.
6300  *
6301  *     Other error code see task_info().
6302  *
6303  * See Also:
6304  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6305  */
6306 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)6307 task_dyld_process_info_notify_register(
6308 	task_t                  task,
6309 	ipc_port_t              sright)
6310 {
6311 	struct task_dyld_info dyld_info;
6312 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6313 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6314 	uint32_t release_count = 0, active_count = 0;
6315 	mach_vm_address_t ports_addr; /* a user space address */
6316 	kern_return_t kr;
6317 	boolean_t right_exists = false;
6318 	ipc_port_t *notifiers_ptr = NULL;
6319 	ipc_port_t *portp;
6320 
6321 	if (task == TASK_NULL || task == kernel_task) {
6322 		return KERN_INVALID_TASK;
6323 	}
6324 
6325 	if (!IP_VALID(sright)) {
6326 		return KERN_INVALID_RIGHT;
6327 	}
6328 
6329 #if CONFIG_MACF
6330 	if (mac_task_check_dyld_process_info_notify_register()) {
6331 		return KERN_DENIED;
6332 	}
6333 #endif
6334 
6335 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6336 	if (kr) {
6337 		return kr;
6338 	}
6339 
6340 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6341 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6342 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6343 	} else {
6344 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6345 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6346 	}
6347 
6348 retry:
6349 	if (task->itk_dyld_notify == NULL) {
6350 		notifiers_ptr = kalloc_type(ipc_port_t,
6351 		    DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
6352 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
6353 	}
6354 
6355 	lck_mtx_lock(&g_dyldinfo_mtx);
6356 	itk_lock(task);
6357 
6358 	if (task->itk_dyld_notify == NULL) {
6359 		if (notifiers_ptr == NULL) {
6360 			itk_unlock(task);
6361 			lck_mtx_unlock(&g_dyldinfo_mtx);
6362 			goto retry;
6363 		}
6364 		task->itk_dyld_notify = notifiers_ptr;
6365 		notifiers_ptr = NULL;
6366 	}
6367 
6368 	assert(task->itk_dyld_notify != NULL);
6369 	/* First pass: clear dead names and check for duplicate registration */
6370 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6371 		portp = &task->itk_dyld_notify[slot];
6372 		if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
6373 			release_ports[release_count++] = *portp;
6374 			*portp = IPC_PORT_NULL;
6375 		} else if (*portp == sright) {
6376 			/* the port is already registered and is active */
6377 			right_exists = true;
6378 		}
6379 
6380 		if (*portp != IPC_PORT_NULL) {
6381 			active_count++;
6382 		}
6383 	}
6384 
6385 	if (right_exists) {
6386 		/* skip second pass */
6387 		kr = KERN_RIGHT_EXISTS;
6388 		goto out;
6389 	}
6390 
6391 	/* Second pass: register the port */
6392 	kr = KERN_NO_SPACE;
6393 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6394 		portp = &task->itk_dyld_notify[slot];
6395 		if (*portp == IPC_PORT_NULL) {
6396 			*portp = sright;
6397 			active_count++;
6398 			kr = KERN_SUCCESS;
6399 			break;
6400 		}
6401 	}
6402 
6403 out:
6404 	assert(active_count > 0);
6405 
6406 	task_dyld_process_info_update_helper(task, active_count,
6407 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6408 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6409 
6410 	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6411 
6412 	return kr;
6413 }
6414 
6415 /*
6416  * Routine: task_dyld_process_info_notify_deregister
6417  *
6418  * Remove a send right in target task's itk_dyld_notify array matching the receive
6419  * right name passed in. Deallocate kernel memory for the array if it's the last port to
6420  * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6421  *
6422  * Does not consume any reference.
6423  *
6424  * Args:
6425  *     task: Target task for the deregistration.
6426  *     rcv_name: The name denoting the receive right in caller's space.
6427  *
6428  * Returns:
6429  *     KERN_SUCCESS: A matching entry found and degistration succeeded.
6430  *     KERN_INVALID_TASK: task is invalid.
6431  *     KERN_INVALID_NAME: name is invalid.
6432  *     KERN_DENIED: Security policy denied this call.
6433  *     KERN_FAILURE: A matching entry is not found.
6434  *     KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6435  *
6436  *     Other error code see task_info().
6437  *
6438  * See Also:
6439  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6440  */
6441 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6442 task_dyld_process_info_notify_deregister(
6443 	task_t                  task,
6444 	mach_port_name_t        rcv_name)
6445 {
6446 	struct task_dyld_info dyld_info;
6447 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6448 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6449 	uint32_t release_count = 0, active_count = 0;
6450 	boolean_t port_found = false;
6451 	mach_vm_address_t ports_addr; /* a user space address */
6452 	ipc_port_t sright;
6453 	kern_return_t kr;
6454 	ipc_port_t *portp;
6455 
6456 	if (task == TASK_NULL || task == kernel_task) {
6457 		return KERN_INVALID_TASK;
6458 	}
6459 
6460 	if (!MACH_PORT_VALID(rcv_name)) {
6461 		return KERN_INVALID_NAME;
6462 	}
6463 
6464 #if CONFIG_MACF
6465 	if (mac_task_check_dyld_process_info_notify_register()) {
6466 		return KERN_DENIED;
6467 	}
6468 #endif
6469 
6470 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6471 	if (kr) {
6472 		return kr;
6473 	}
6474 
6475 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6476 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6477 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6478 	} else {
6479 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6480 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6481 	}
6482 
6483 	kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6484 	if (kr) {
6485 		return KERN_INVALID_RIGHT;
6486 	}
6487 
6488 	ip_reference(sright);
6489 	ip_mq_unlock(sright);
6490 
6491 	assert(sright != IPC_PORT_NULL);
6492 
6493 	lck_mtx_lock(&g_dyldinfo_mtx);
6494 	itk_lock(task);
6495 
6496 	if (task->itk_dyld_notify == NULL) {
6497 		itk_unlock(task);
6498 		lck_mtx_unlock(&g_dyldinfo_mtx);
6499 		ip_release(sright);
6500 		return KERN_FAILURE;
6501 	}
6502 
6503 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6504 		portp = &task->itk_dyld_notify[slot];
6505 		if (*portp == sright) {
6506 			release_ports[release_count++] = *portp;
6507 			*portp = IPC_PORT_NULL;
6508 			port_found = true;
6509 		} else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6510 			release_ports[release_count++] = *portp;
6511 			*portp = IPC_PORT_NULL;
6512 		}
6513 
6514 		if (*portp != IPC_PORT_NULL) {
6515 			active_count++;
6516 		}
6517 	}
6518 
6519 	task_dyld_process_info_update_helper(task, active_count,
6520 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6521 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6522 
6523 	ip_release(sright);
6524 
6525 	return port_found ? KERN_SUCCESS : KERN_FAILURE;
6526 }
6527 
6528 /*
6529  *	task_power_info
6530  *
6531  *	Returns power stats for the task.
6532  *	Note: Called with task locked.
6533  */
6534 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,struct task_power_info_extra * extra_info)6535 task_power_info_locked(
6536 	task_t                        task,
6537 	task_power_info_t             info,
6538 	gpu_energy_data_t             ginfo,
6539 	task_power_info_v2_t          infov2,
6540 	struct task_power_info_extra *extra_info)
6541 {
6542 	thread_t                thread;
6543 	ledger_amount_t         tmp;
6544 
6545 	uint64_t                runnable_time_sum = 0;
6546 
6547 	task_lock_assert_owned(task);
6548 
6549 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6550 	    (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6551 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6552 	    (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6553 
6554 	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6555 	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6556 
6557 	struct recount_usage usage = { 0 };
6558 	struct recount_usage usage_perf = { 0 };
6559 	recount_task_usage_perf_only(task, &usage, &usage_perf);
6560 
6561 	info->total_user = usage.ru_metrics[RCT_LVL_USER].rm_time_mach;
6562 	info->total_system = recount_usage_system_time_mach(&usage);
6563 	runnable_time_sum = task->total_runnable_time;
6564 
6565 	if (ginfo) {
6566 		ginfo->task_gpu_utilisation = task->task_gpu_ns;
6567 	}
6568 
6569 	if (infov2) {
6570 		infov2->task_ptime = recount_usage_time_mach(&usage_perf);
6571 		infov2->task_pset_switches = task->ps_switch;
6572 #if CONFIG_PERVASIVE_ENERGY
6573 		infov2->task_energy = usage.ru_energy_nj;
6574 #endif /* CONFIG_PERVASIVE_ENERGY */
6575 	}
6576 
6577 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6578 		spl_t x;
6579 
6580 		if (thread->options & TH_OPT_IDLE_THREAD) {
6581 			continue;
6582 		}
6583 
6584 		x = splsched();
6585 		thread_lock(thread);
6586 
6587 		info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6588 		info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6589 
6590 		if (infov2) {
6591 			infov2->task_pset_switches += thread->ps_switch;
6592 		}
6593 
6594 		runnable_time_sum += timer_grab(&thread->runnable_timer);
6595 
6596 		if (ginfo) {
6597 			ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6598 		}
6599 		thread_unlock(thread);
6600 		splx(x);
6601 	}
6602 
6603 	if (extra_info) {
6604 		extra_info->runnable_time = runnable_time_sum;
6605 #if CONFIG_PERVASIVE_CPI
6606 		extra_info->cycles = recount_usage_cycles(&usage);
6607 		extra_info->instructions = recount_usage_instructions(&usage);
6608 		extra_info->pcycles = recount_usage_cycles(&usage_perf);
6609 		extra_info->pinstructions = recount_usage_instructions(&usage_perf);
6610 		extra_info->user_ptime = usage_perf.ru_metrics[RCT_LVL_USER].rm_time_mach;
6611 		extra_info->system_ptime = recount_usage_system_time_mach(&usage_perf);
6612 #endif // CONFIG_PERVASIVE_CPI
6613 #if CONFIG_PERVASIVE_ENERGY
6614 		extra_info->energy = usage.ru_energy_nj;
6615 		extra_info->penergy = usage_perf.ru_energy_nj;
6616 #endif // CONFIG_PERVASIVE_ENERGY
6617 #if RECOUNT_SECURE_METRICS
6618 		if (PE_i_can_has_debugger(NULL)) {
6619 			extra_info->secure_time = usage.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6620 			extra_info->secure_ptime = usage_perf.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6621 		}
6622 #endif // RECOUNT_SECURE_METRICS
6623 	}
6624 }
6625 
6626 /*
6627  *	task_gpu_utilisation
6628  *
6629  *	Returns the total gpu time used by the all the threads of the task
6630  *  (both dead and alive)
6631  */
6632 uint64_t
task_gpu_utilisation(task_t task)6633 task_gpu_utilisation(
6634 	task_t  task)
6635 {
6636 	uint64_t gpu_time = 0;
6637 #if defined(__x86_64__)
6638 	thread_t thread;
6639 
6640 	task_lock(task);
6641 	gpu_time += task->task_gpu_ns;
6642 
6643 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6644 		spl_t x;
6645 		x = splsched();
6646 		thread_lock(thread);
6647 		gpu_time += ml_gpu_stat(thread);
6648 		thread_unlock(thread);
6649 		splx(x);
6650 	}
6651 
6652 	task_unlock(task);
6653 #else /* defined(__x86_64__) */
6654 	/* silence compiler warning */
6655 	(void)task;
6656 #endif /* defined(__x86_64__) */
6657 	return gpu_time;
6658 }
6659 
6660 /* This function updates the cpu time in the arrays for each
6661  * effective and requested QoS class
6662  */
6663 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6664 task_update_cpu_time_qos_stats(
6665 	task_t  task,
6666 	uint64_t *eqos_stats,
6667 	uint64_t *rqos_stats)
6668 {
6669 	if (!eqos_stats && !rqos_stats) {
6670 		return;
6671 	}
6672 
6673 	task_lock(task);
6674 	thread_t thread;
6675 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6676 		if (thread->options & TH_OPT_IDLE_THREAD) {
6677 			continue;
6678 		}
6679 
6680 		thread_update_qos_cpu_time(thread);
6681 	}
6682 
6683 	if (eqos_stats) {
6684 		eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6685 		eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6686 		eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6687 		eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6688 		eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6689 		eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6690 		eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6691 	}
6692 
6693 	if (rqos_stats) {
6694 		rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6695 		rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6696 		rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6697 		rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6698 		rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6699 		rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6700 		rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6701 	}
6702 
6703 	task_unlock(task);
6704 }
6705 
6706 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6707 task_purgable_info(
6708 	task_t                  task,
6709 	task_purgable_info_t    *stats)
6710 {
6711 	if (task == TASK_NULL || stats == NULL) {
6712 		return KERN_INVALID_ARGUMENT;
6713 	}
6714 	/* Take task reference */
6715 	task_reference(task);
6716 	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6717 	/* Drop task reference */
6718 	task_deallocate(task);
6719 	return KERN_SUCCESS;
6720 }
6721 
6722 void
task_vtimer_set(task_t task,integer_t which)6723 task_vtimer_set(
6724 	task_t          task,
6725 	integer_t       which)
6726 {
6727 	thread_t        thread;
6728 	spl_t           x;
6729 
6730 	task_lock(task);
6731 
6732 	task->vtimers |= which;
6733 
6734 	switch (which) {
6735 	case TASK_VTIMER_USER:
6736 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6737 			x = splsched();
6738 			thread_lock(thread);
6739 			struct recount_times_mach times = recount_thread_times(thread);
6740 			thread->vtimer_user_save = times.rtm_user;
6741 			thread_unlock(thread);
6742 			splx(x);
6743 		}
6744 		break;
6745 
6746 	case TASK_VTIMER_PROF:
6747 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6748 			x = splsched();
6749 			thread_lock(thread);
6750 			thread->vtimer_prof_save = recount_thread_time_mach(thread);
6751 			thread_unlock(thread);
6752 			splx(x);
6753 		}
6754 		break;
6755 
6756 	case TASK_VTIMER_RLIM:
6757 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6758 			x = splsched();
6759 			thread_lock(thread);
6760 			thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6761 			thread_unlock(thread);
6762 			splx(x);
6763 		}
6764 		break;
6765 	}
6766 
6767 	task_unlock(task);
6768 }
6769 
6770 void
task_vtimer_clear(task_t task,integer_t which)6771 task_vtimer_clear(
6772 	task_t          task,
6773 	integer_t       which)
6774 {
6775 	task_lock(task);
6776 
6777 	task->vtimers &= ~which;
6778 
6779 	task_unlock(task);
6780 }
6781 
6782 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6783 task_vtimer_update(
6784 	__unused
6785 	task_t          task,
6786 	integer_t       which,
6787 	uint32_t        *microsecs)
6788 {
6789 	thread_t        thread = current_thread();
6790 	uint32_t        tdelt = 0;
6791 	clock_sec_t     secs = 0;
6792 	uint64_t        tsum;
6793 
6794 	assert(task == current_task());
6795 
6796 	spl_t s = splsched();
6797 	thread_lock(thread);
6798 
6799 	if ((task->vtimers & which) != (uint32_t)which) {
6800 		thread_unlock(thread);
6801 		splx(s);
6802 		return;
6803 	}
6804 
6805 	switch (which) {
6806 	case TASK_VTIMER_USER:;
6807 		struct recount_times_mach times = recount_thread_times(thread);
6808 		tsum = times.rtm_user;
6809 		tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6810 		thread->vtimer_user_save = tsum;
6811 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6812 		break;
6813 
6814 	case TASK_VTIMER_PROF:
6815 		tsum = recount_current_thread_time_mach();
6816 		tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6817 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6818 		/* if the time delta is smaller than a usec, ignore */
6819 		if (*microsecs != 0) {
6820 			thread->vtimer_prof_save = tsum;
6821 		}
6822 		break;
6823 
6824 	case TASK_VTIMER_RLIM:
6825 		tsum = recount_current_thread_time_mach();
6826 		tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6827 		thread->vtimer_rlim_save = tsum;
6828 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6829 		break;
6830 	}
6831 
6832 	thread_unlock(thread);
6833 	splx(s);
6834 }
6835 
6836 uint64_t
get_task_dispatchqueue_offset(task_t task)6837 get_task_dispatchqueue_offset(
6838 	task_t          task)
6839 {
6840 	return task->dispatchqueue_offset;
6841 }
6842 
6843 void
task_synchronizer_destroy_all(task_t task)6844 task_synchronizer_destroy_all(task_t task)
6845 {
6846 	/*
6847 	 *  Destroy owned semaphores
6848 	 */
6849 	semaphore_destroy_all(task);
6850 }
6851 
6852 /*
6853  * Install default (machine-dependent) initial thread state
6854  * on the task.  Subsequent thread creation will have this initial
6855  * state set on the thread by machine_thread_inherit_taskwide().
6856  * Flavors and structures are exactly the same as those to thread_set_state()
6857  */
6858 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6859 task_set_state(
6860 	task_t task,
6861 	int flavor,
6862 	thread_state_t state,
6863 	mach_msg_type_number_t state_count)
6864 {
6865 	kern_return_t ret;
6866 
6867 	if (task == TASK_NULL) {
6868 		return KERN_INVALID_ARGUMENT;
6869 	}
6870 
6871 	task_lock(task);
6872 
6873 	if (!task->active) {
6874 		task_unlock(task);
6875 		return KERN_FAILURE;
6876 	}
6877 
6878 	ret = machine_task_set_state(task, flavor, state, state_count);
6879 
6880 	task_unlock(task);
6881 	return ret;
6882 }
6883 
6884 /*
6885  * Examine the default (machine-dependent) initial thread state
6886  * on the task, as set by task_set_state().  Flavors and structures
6887  * are exactly the same as those passed to thread_get_state().
6888  */
6889 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6890 task_get_state(
6891 	task_t  task,
6892 	int     flavor,
6893 	thread_state_t state,
6894 	mach_msg_type_number_t *state_count)
6895 {
6896 	kern_return_t ret;
6897 
6898 	if (task == TASK_NULL) {
6899 		return KERN_INVALID_ARGUMENT;
6900 	}
6901 
6902 	task_lock(task);
6903 
6904 	if (!task->active) {
6905 		task_unlock(task);
6906 		return KERN_FAILURE;
6907 	}
6908 
6909 	ret = machine_task_get_state(task, flavor, state, state_count);
6910 
6911 	task_unlock(task);
6912 	return ret;
6913 }
6914 
6915 
6916 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,boolean_t backtrace_only)6917 PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6918 	mach_exception_code_t code,
6919 	mach_exception_subcode_t subcode,
6920 	void *reason,
6921 	boolean_t backtrace_only)
6922 {
6923 #ifdef MACH_BSD
6924 	if (1 == proc_selfpid()) {
6925 		return KERN_NOT_SUPPORTED;              // initproc is immune
6926 	}
6927 #endif
6928 	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6929 		[0] = code,
6930 		[1] = subcode,
6931 	};
6932 	task_t task = current_task();
6933 	kern_return_t kr;
6934 	void *bsd_info = get_bsdtask_info(task);
6935 
6936 	/* (See jetsam-related comments below) */
6937 
6938 	proc_memstat_skip(bsd_info, TRUE);
6939 	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason, backtrace_only);
6940 	proc_memstat_skip(bsd_info, FALSE);
6941 	return kr;
6942 }
6943 
6944 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,bool backtrace_only)6945 task_violated_guard(
6946 	mach_exception_code_t code,
6947 	mach_exception_subcode_t subcode,
6948 	void *reason,
6949 	bool backtrace_only)
6950 {
6951 	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6952 }
6953 
6954 
6955 #if CONFIG_MEMORYSTATUS
6956 
6957 boolean_t
task_get_memlimit_is_active(task_t task)6958 task_get_memlimit_is_active(task_t task)
6959 {
6960 	assert(task != NULL);
6961 
6962 	if (task->memlimit_is_active == 1) {
6963 		return TRUE;
6964 	} else {
6965 		return FALSE;
6966 	}
6967 }
6968 
6969 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6970 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6971 {
6972 	assert(task != NULL);
6973 
6974 	if (memlimit_is_active) {
6975 		task->memlimit_is_active = 1;
6976 	} else {
6977 		task->memlimit_is_active = 0;
6978 	}
6979 }
6980 
6981 boolean_t
task_get_memlimit_is_fatal(task_t task)6982 task_get_memlimit_is_fatal(task_t task)
6983 {
6984 	assert(task != NULL);
6985 
6986 	if (task->memlimit_is_fatal == 1) {
6987 		return TRUE;
6988 	} else {
6989 		return FALSE;
6990 	}
6991 }
6992 
6993 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6994 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6995 {
6996 	assert(task != NULL);
6997 
6998 	if (memlimit_is_fatal) {
6999 		task->memlimit_is_fatal = 1;
7000 	} else {
7001 		task->memlimit_is_fatal = 0;
7002 	}
7003 }
7004 
7005 uint64_t
task_get_dirty_start(task_t task)7006 task_get_dirty_start(task_t task)
7007 {
7008 	return task->memstat_dirty_start;
7009 }
7010 
7011 void
task_set_dirty_start(task_t task,uint64_t start)7012 task_set_dirty_start(task_t task, uint64_t start)
7013 {
7014 	task_lock(task);
7015 	task->memstat_dirty_start = start;
7016 	task_unlock(task);
7017 }
7018 
7019 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)7020 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7021 {
7022 	boolean_t triggered = FALSE;
7023 
7024 	assert(task == current_task());
7025 
7026 	/*
7027 	 * Returns true, if task has already triggered an exc_resource exception.
7028 	 */
7029 
7030 	if (memlimit_is_active) {
7031 		triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
7032 	} else {
7033 		triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
7034 	}
7035 
7036 	return triggered;
7037 }
7038 
7039 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)7040 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7041 {
7042 	assert(task == current_task());
7043 
7044 	/*
7045 	 * We allow one exc_resource per process per active/inactive limit.
7046 	 * The limit's fatal attribute does not come into play.
7047 	 */
7048 
7049 	if (memlimit_is_active) {
7050 		task->memlimit_active_exc_resource = 1;
7051 	} else {
7052 		task->memlimit_inactive_exc_resource = 1;
7053 	}
7054 }
7055 
7056 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
7057 
7058 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,send_exec_resource_options_t exception_options)7059 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options)
7060 {
7061 	task_t                                          task            = current_task();
7062 	int                                                     pid         = 0;
7063 	const char                                      *procname       = "unknown";
7064 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7065 	boolean_t send_sync_exc_resource = FALSE;
7066 	void *cur_bsd_info = get_bsdtask_info(current_task());
7067 
7068 #ifdef MACH_BSD
7069 	pid = proc_selfpid();
7070 
7071 	if (pid == 1) {
7072 		/*
7073 		 * Cannot have ReportCrash analyzing
7074 		 * a suspended initproc.
7075 		 */
7076 		return;
7077 	}
7078 
7079 	if (cur_bsd_info != NULL) {
7080 		procname = proc_name_address(cur_bsd_info);
7081 		send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(cur_bsd_info);
7082 	}
7083 #endif
7084 #if CONFIG_COREDUMP
7085 	if (hwm_user_cores) {
7086 		int                             error;
7087 		uint64_t                starttime, end;
7088 		clock_sec_t             secs = 0;
7089 		uint32_t                microsecs = 0;
7090 
7091 		starttime = mach_absolute_time();
7092 		/*
7093 		 * Trigger a coredump of this process. Don't proceed unless we know we won't
7094 		 * be filling up the disk; and ignore the core size resource limit for this
7095 		 * core file.
7096 		 */
7097 		if ((error = coredump(cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
7098 			printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
7099 		}
7100 		/*
7101 		 * coredump() leaves the task suspended.
7102 		 */
7103 		task_resume_internal(current_task());
7104 
7105 		end = mach_absolute_time();
7106 		absolutetime_to_microtime(end - starttime, &secs, &microsecs);
7107 		printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
7108 		    proc_name_address(cur_bsd_info), pid, (int)secs, microsecs);
7109 	}
7110 #endif /* CONFIG_COREDUMP */
7111 
7112 	if (disable_exc_resource) {
7113 		printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7114 		    "suppressed by a boot-arg.\n", procname, pid, max_footprint_mb);
7115 		return;
7116 	}
7117 	printf("process %s [%d] crossed memory %s (%d MB); EXC_RESOURCE "
7118 	    "\n", procname, pid, (!(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? "high watermark" : "diagnostics limit"), max_footprint_mb);
7119 
7120 	/*
7121 	 * A task that has triggered an EXC_RESOURCE, should not be
7122 	 * jetsammed when the device is under memory pressure.  Here
7123 	 * we set the P_MEMSTAT_SKIP flag so that the process
7124 	 * will be skipped if the memorystatus_thread wakes up.
7125 	 *
7126 	 * This is a debugging aid to ensure we can get a corpse before
7127 	 * the jetsam thread kills the process.
7128 	 * Note that proc_memstat_skip is a no-op on release kernels.
7129 	 */
7130 	proc_memstat_skip(cur_bsd_info, TRUE);
7131 
7132 	code[0] = code[1] = 0;
7133 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
7134 	/*
7135 	 * Regardless if there was a diag memlimit violation, fatal exceptions shall be notified always
7136 	 * as high level watermaks. In another words, if there was a diag limit and a watermark, and the
7137 	 * violation if for limit watermark, a watermark shall be reported.
7138 	 */
7139 	if (!(exception_options & EXEC_RESOURCE_FATAL)) {
7140 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], !(exception_options & EXEC_RESOURCE_DIAGNOSTIC)  ? FLAVOR_HIGH_WATERMARK : FLAVOR_DIAG_MEMLIMIT);
7141 	} else {
7142 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK );
7143 	}
7144 	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
7145 	/*
7146 	 * Do not generate a corpse fork if the violation is a fatal one
7147 	 * or the process wants synchronous EXC_RESOURCE exceptions.
7148 	 */
7149 	if ((exception_options & EXEC_RESOURCE_FATAL) || send_sync_exc_resource || !exc_via_corpse_forking) {
7150 		if (exception_options & EXEC_RESOURCE_FATAL) {
7151 			vm_map_set_corpse_source(task->map);
7152 		}
7153 
7154 		/* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
7155 		if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
7156 			/*
7157 			 * Use the _internal_ variant so that no user-space
7158 			 * process can resume our task from under us.
7159 			 */
7160 			task_suspend_internal(task);
7161 			exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7162 			task_resume_internal(task);
7163 		}
7164 	} else {
7165 		if (disable_exc_resource_during_audio && audio_active) {
7166 			printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7167 			    "suppressed due to audio playback.\n", procname, pid, max_footprint_mb);
7168 		} else {
7169 			task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
7170 			    code, EXCEPTION_CODE_MAX, NULL, FALSE);
7171 		}
7172 	}
7173 
7174 	/*
7175 	 * After the EXC_RESOURCE has been handled, we must clear the
7176 	 * P_MEMSTAT_SKIP flag so that the process can again be
7177 	 * considered for jetsam if the memorystatus_thread wakes up.
7178 	 */
7179 	proc_memstat_skip(cur_bsd_info, FALSE);         /* clear the flag */
7180 }
7181 /*
7182  * Callback invoked when a task exceeds its physical footprint limit.
7183  */
7184 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)7185 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7186 {
7187 	ledger_amount_t max_footprint = 0;
7188 	ledger_amount_t max_footprint_mb = 0;
7189 #if DEBUG || DEVELOPMENT
7190 	ledger_amount_t diag_threshold_limit_mb = 0;
7191 	ledger_amount_t diag_threshold_limit = 0;
7192 #endif
7193 #if CONFIG_DEFERRED_RECLAIM
7194 	ledger_amount_t current_footprint;
7195 #endif /* CONFIG_DEFERRED_RECLAIM */
7196 	task_t task;
7197 	send_exec_resource_is_warning is_warning = IS_NOT_WARNING;
7198 	boolean_t memlimit_is_active;
7199 	send_exec_resource_is_fatal memlimit_is_fatal;
7200 	send_exec_resource_is_diagnostics is_diag_mem_threshold = IS_NOT_DIAGNOSTICS;
7201 	if (warning == LEDGER_WARNING_DIAG_MEM_THRESHOLD) {
7202 		is_diag_mem_threshold = IS_DIAGNOSTICS;
7203 		is_warning = IS_WARNING;
7204 	} else if (warning == LEDGER_WARNING_DIPPED_BELOW) {
7205 		/*
7206 		 * Task memory limits only provide a warning on the way up.
7207 		 */
7208 		return;
7209 	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7210 		/*
7211 		 * This task is in danger of violating a memory limit,
7212 		 * It has exceeded a percentage level of the limit.
7213 		 */
7214 		is_warning = IS_WARNING;
7215 	} else {
7216 		/*
7217 		 * The task has exceeded the physical footprint limit.
7218 		 * This is not a warning but a true limit violation.
7219 		 */
7220 		is_warning = IS_NOT_WARNING;
7221 	}
7222 
7223 	task = current_task();
7224 
7225 	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
7226 #if DEBUG || DEVELOPMENT
7227 	ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &diag_threshold_limit);
7228 #endif
7229 #if CONFIG_DEFERRED_RECLAIM
7230 	if (task->deferred_reclamation_metadata != NULL) {
7231 		/*
7232 		 * Task is enrolled in deferred reclamation.
7233 		 * Do a reclaim to ensure it's really over its limit.
7234 		 */
7235 		vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
7236 		ledger_get_balance(task->ledger, task_ledgers.phys_footprint, &current_footprint);
7237 		if (current_footprint < max_footprint) {
7238 			return;
7239 		}
7240 	}
7241 #endif /* CONFIG_DEFERRED_RECLAIM */
7242 	max_footprint_mb = max_footprint >> 20;
7243 #if DEBUG || DEVELOPMENT
7244 	diag_threshold_limit_mb = diag_threshold_limit >> 20;
7245 #endif
7246 	memlimit_is_active = task_get_memlimit_is_active(task);
7247 	memlimit_is_fatal = task_get_memlimit_is_fatal(task) == FALSE ? IS_NOT_FATAL : IS_FATAL;
7248 #if DEBUG || DEVELOPMENT
7249 	if (is_diag_mem_threshold == IS_NOT_DIAGNOSTICS) {
7250 		task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7251 	} else {
7252 		task_process_crossed_limit_diag(diag_threshold_limit_mb);
7253 	}
7254 #else
7255 	task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7256 #endif
7257 }
7258 
7259 /*
7260  * Actions to perfrom when a process has crossed watermark or is a fatal consumption */
7261 static inline void
task_process_crossed_limit_no_diag(task_t task,ledger_amount_t ledger_limit_size,bool memlimit_is_fatal,bool memlimit_is_active,send_exec_resource_is_warning is_warning)7262 task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning)
7263 {
7264 	send_exec_resource_options_t exception_options = 0;
7265 	if (memlimit_is_fatal) {
7266 		exception_options |= EXEC_RESOURCE_FATAL;
7267 	}
7268 	/*
7269 	 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7270 	 * We only generate the exception once per process per memlimit (active/inactive limit).
7271 	 * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
7272 	 * and we disable it by marking that memlimit as exception triggered.
7273 	 */
7274 	if (is_warning == IS_NOT_WARNING && !task_has_triggered_exc_resource(task, memlimit_is_active)) {
7275 		PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7276 		// If it was not a diag threshold (if was a memory limit), then we do not want more signalling,
7277 		// however, if was a diag limit, the user may reload a different limit and signal again the violation
7278 		memorystatus_log_exception((int)ledger_limit_size, memlimit_is_active, memlimit_is_fatal);
7279 		task_mark_has_triggered_exc_resource(task, memlimit_is_active);
7280 	}
7281 	memorystatus_on_ledger_footprint_exceeded(is_warning == IS_NOT_WARNING ? FALSE : TRUE, memlimit_is_active, memlimit_is_fatal);
7282 }
7283 
7284 #if DEBUG || DEVELOPMENT
7285 /**
7286  * Actions to take when a process has crossed the diagnostics limit
7287  */
7288 static inline void
task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)7289 task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)
7290 {
7291 	/*
7292 	 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7293 	 * In the case of the diagnostics thresholds, the exception will be signaled only once, but the
7294 	 * inhibit / rearm mechanism if performed at ledger level.
7295 	 */
7296 	send_exec_resource_options_t exception_options = EXEC_RESOURCE_DIAGNOSTIC;
7297 	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7298 	memorystatus_log_diag_threshold_exception((int)ledger_limit_size);
7299 }
7300 #endif
7301 
7302 extern int proc_check_footprint_priv(void);
7303 
7304 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)7305 task_set_phys_footprint_limit(
7306 	task_t task,
7307 	int new_limit_mb,
7308 	int *old_limit_mb)
7309 {
7310 	kern_return_t error;
7311 
7312 	boolean_t memlimit_is_active;
7313 	boolean_t memlimit_is_fatal;
7314 
7315 	if ((error = proc_check_footprint_priv())) {
7316 		return KERN_NO_ACCESS;
7317 	}
7318 
7319 	/*
7320 	 * This call should probably be obsoleted.
7321 	 * But for now, we default to current state.
7322 	 */
7323 	memlimit_is_active = task_get_memlimit_is_active(task);
7324 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
7325 
7326 	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
7327 }
7328 
7329 /*
7330  * Set the limit of diagnostics memory consumption for a concrete task
7331  */
7332 #if CONFIG_MEMORYSTATUS
7333 #if DEVELOPMENT || DEBUG
7334 kern_return_t
task_set_diag_footprint_limit(task_t task,uint64_t new_limit_mb,uint64_t * old_limit_mb)7335 task_set_diag_footprint_limit(
7336 	task_t task,
7337 	uint64_t new_limit_mb,
7338 	uint64_t *old_limit_mb)
7339 {
7340 	kern_return_t error;
7341 
7342 	if ((error = proc_check_footprint_priv())) {
7343 		return KERN_NO_ACCESS;
7344 	}
7345 
7346 	return task_set_diag_footprint_limit_internal(task, new_limit_mb, old_limit_mb);
7347 }
7348 
7349 #endif // DEVELOPMENT || DEBUG
7350 #endif // CONFIG_MEMORYSTATUS
7351 
7352 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)7353 task_convert_phys_footprint_limit(
7354 	int limit_mb,
7355 	int *converted_limit_mb)
7356 {
7357 	if (limit_mb == -1) {
7358 		/*
7359 		 * No limit
7360 		 */
7361 		if (max_task_footprint != 0) {
7362 			*converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);         /* bytes to MB */
7363 		} else {
7364 			*converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
7365 		}
7366 	} else {
7367 		/* nothing to convert */
7368 		*converted_limit_mb = limit_mb;
7369 	}
7370 	return KERN_SUCCESS;
7371 }
7372 
7373 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)7374 task_set_phys_footprint_limit_internal(
7375 	task_t task,
7376 	int new_limit_mb,
7377 	int *old_limit_mb,
7378 	boolean_t memlimit_is_active,
7379 	boolean_t memlimit_is_fatal)
7380 {
7381 	ledger_amount_t old;
7382 	kern_return_t ret;
7383 #if DEVELOPMENT || DEBUG
7384 	diagthreshold_check_return diag_threshold_validity;
7385 #endif
7386 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
7387 
7388 	if (ret != KERN_SUCCESS) {
7389 		return ret;
7390 	}
7391 	/**
7392 	 * Maybe we will need to re-enable the diag threshold, lets get the value
7393 	 * and the current status
7394 	 */
7395 #if DEVELOPMENT || DEBUG
7396 	diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_mb, false);
7397 	/**
7398 	 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7399 	 */
7400 	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7401 		ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7402 	} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7403 		ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7404 	}
7405 #endif
7406 
7407 	/*
7408 	 * Check that limit >> 20 will not give an "unexpected" 32-bit
7409 	 * result. There are, however, implicit assumptions that -1 mb limit
7410 	 * equates to LEDGER_LIMIT_INFINITY.
7411 	 */
7412 	assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
7413 
7414 	if (old_limit_mb) {
7415 		*old_limit_mb = (int)(old >> 20);
7416 	}
7417 
7418 	if (new_limit_mb == -1) {
7419 		/*
7420 		 * Caller wishes to remove the limit.
7421 		 */
7422 		ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7423 		    max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7424 		    max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
7425 
7426 		task_lock(task);
7427 		task_set_memlimit_is_active(task, memlimit_is_active);
7428 		task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7429 		task_unlock(task);
7430 		/**
7431 		 * If the diagnostics were disabled, and now we have a new limit, we have to re-enable it.
7432 		 */
7433 #if DEVELOPMENT || DEBUG
7434 		if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7435 			ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7436 		} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7437 			ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7438 		}
7439 	#endif
7440 		return KERN_SUCCESS;
7441 	}
7442 
7443 #ifdef CONFIG_NOMONITORS
7444 	return KERN_SUCCESS;
7445 #endif /* CONFIG_NOMONITORS */
7446 
7447 	task_lock(task);
7448 
7449 	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7450 	    (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7451 	    (((ledger_amount_t)new_limit_mb << 20) == old)) {
7452 		/*
7453 		 * memlimit state is not changing
7454 		 */
7455 		task_unlock(task);
7456 		return KERN_SUCCESS;
7457 	}
7458 
7459 	task_set_memlimit_is_active(task, memlimit_is_active);
7460 	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7461 
7462 	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7463 	    (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
7464 
7465 	if (task == current_task()) {
7466 		ledger_check_new_balance(current_thread(), task->ledger,
7467 		    task_ledgers.phys_footprint);
7468 	}
7469 
7470 	task_unlock(task);
7471 #if DEVELOPMENT || DEBUG
7472 	if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7473 		ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7474 	}
7475 	#endif
7476 
7477 	return KERN_SUCCESS;
7478 }
7479 
7480 #if RESETTABLE_DIAG_FOOTPRINT_LIMITS
7481 kern_return_t
task_set_diag_footprint_limit_internal(task_t task,uint64_t new_limit_bytes,uint64_t * old_limit_bytes)7482 task_set_diag_footprint_limit_internal(
7483 	task_t task,
7484 	uint64_t new_limit_bytes,
7485 	uint64_t *old_limit_bytes)
7486 {
7487 	ledger_amount_t old = 0;
7488 	kern_return_t ret = KERN_SUCCESS;
7489 	diagthreshold_check_return diag_threshold_validity;
7490 	ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &old);
7491 
7492 	if (ret != KERN_SUCCESS) {
7493 		return ret;
7494 	}
7495 	/**
7496 	 * Maybe we will need to re-enable the diag threshold, lets get the value
7497 	 * and the current status
7498 	 */
7499 	diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_bytes >> 20, true);
7500 	/**
7501 	 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7502 	 */
7503 	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7504 		ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7505 	}
7506 
7507 	/*
7508 	 * Check that limit >> 20 will not give an "unexpected" 32-bit
7509 	 * result. There are, however, implicit assumptions that -1 mb limit
7510 	 * equates to LEDGER_LIMIT_INFINITY.
7511 	 */
7512 	if (old_limit_bytes) {
7513 		*old_limit_bytes = old;
7514 	}
7515 
7516 	if (new_limit_bytes == -1) {
7517 		/*
7518 		 * Caller wishes to remove the limit.
7519 		 */
7520 		ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7521 		    LEDGER_LIMIT_INFINITY);
7522 		/*
7523 		 * If the memory diagnostics flag was disabled, lets enable it again
7524 		 */
7525 		ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7526 		return KERN_SUCCESS;
7527 	}
7528 
7529 #ifdef CONFIG_NOMONITORS
7530 	return KERN_SUCCESS;
7531 #else
7532 
7533 	task_lock(task);
7534 	ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7535 	    (ledger_amount_t)new_limit_bytes );
7536 	if (task == current_task()) {
7537 		ledger_check_new_balance(current_thread(), task->ledger,
7538 		    task_ledgers.phys_footprint);
7539 	}
7540 
7541 	task_unlock(task);
7542 	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7543 		ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7544 	} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7545 		ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7546 	}
7547 
7548 	return KERN_SUCCESS;
7549 #endif /* CONFIG_NOMONITORS */
7550 }
7551 
7552 kern_return_t
task_get_diag_footprint_limit_internal(task_t task,uint64_t * new_limit_bytes,bool * threshold_disabled)7553 task_get_diag_footprint_limit_internal(
7554 	task_t task,
7555 	uint64_t *new_limit_bytes,
7556 	bool *threshold_disabled)
7557 {
7558 	ledger_amount_t ledger_limit;
7559 	kern_return_t ret = KERN_SUCCESS;
7560 	if (new_limit_bytes == NULL || threshold_disabled == NULL) {
7561 		return KERN_INVALID_ARGUMENT;
7562 	}
7563 	ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &ledger_limit);
7564 	if (ledger_limit == LEDGER_LIMIT_INFINITY) {
7565 		ledger_limit = -1;
7566 	}
7567 	if (ret == KERN_SUCCESS) {
7568 		*new_limit_bytes = ledger_limit;
7569 		ret = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, threshold_disabled);
7570 	}
7571 	return ret;
7572 }
7573 #endif /* RESETTABLE_DIAG_FOOTPRINT_LIMITS */
7574 
7575 
7576 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)7577 task_get_phys_footprint_limit(
7578 	task_t task,
7579 	int *limit_mb)
7580 {
7581 	ledger_amount_t limit;
7582 	kern_return_t ret;
7583 
7584 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
7585 	if (ret != KERN_SUCCESS) {
7586 		return ret;
7587 	}
7588 
7589 	/*
7590 	 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7591 	 * result. There are, however, implicit assumptions that -1 mb limit
7592 	 * equates to LEDGER_LIMIT_INFINITY.
7593 	 */
7594 	assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7595 	*limit_mb = (int)(limit >> 20);
7596 
7597 	return KERN_SUCCESS;
7598 }
7599 #else /* CONFIG_MEMORYSTATUS */
7600 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7601 task_set_phys_footprint_limit(
7602 	__unused task_t task,
7603 	__unused int new_limit_mb,
7604 	__unused int *old_limit_mb)
7605 {
7606 	return KERN_FAILURE;
7607 }
7608 
7609 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7610 task_get_phys_footprint_limit(
7611 	__unused task_t task,
7612 	__unused int *limit_mb)
7613 {
7614 	return KERN_FAILURE;
7615 }
7616 #endif /* CONFIG_MEMORYSTATUS */
7617 
7618 security_token_t *
task_get_sec_token(task_t task)7619 task_get_sec_token(task_t task)
7620 {
7621 	return &task_get_ro(task)->task_tokens.sec_token;
7622 }
7623 
7624 void
task_set_sec_token(task_t task,security_token_t * token)7625 task_set_sec_token(task_t task, security_token_t *token)
7626 {
7627 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7628 	    task_tokens.sec_token, token);
7629 }
7630 
7631 audit_token_t *
task_get_audit_token(task_t task)7632 task_get_audit_token(task_t task)
7633 {
7634 	return &task_get_ro(task)->task_tokens.audit_token;
7635 }
7636 
7637 void
task_set_audit_token(task_t task,audit_token_t * token)7638 task_set_audit_token(task_t task, audit_token_t *token)
7639 {
7640 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7641 	    task_tokens.audit_token, token);
7642 }
7643 
7644 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7645 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7646 {
7647 	struct task_token_ro_data tokens;
7648 
7649 	tokens = task_get_ro(task)->task_tokens;
7650 	tokens.sec_token = *sec_token;
7651 	tokens.audit_token = *audit_token;
7652 
7653 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7654 	    &tokens);
7655 }
7656 
7657 boolean_t
task_is_privileged(task_t task)7658 task_is_privileged(task_t task)
7659 {
7660 	return task_get_sec_token(task)->val[0] == 0;
7661 }
7662 
7663 #ifdef CONFIG_MACF
7664 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7665 task_get_mach_trap_filter_mask(task_t task)
7666 {
7667 	return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7668 }
7669 
7670 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7671 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7672 {
7673 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7674 	    task_filters.mach_trap_filter_mask, &mask);
7675 }
7676 
7677 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7678 task_get_mach_kobj_filter_mask(task_t task)
7679 {
7680 	return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7681 }
7682 
7683 mach_vm_address_t
task_get_all_image_info_addr(task_t task)7684 task_get_all_image_info_addr(task_t task)
7685 {
7686 	return task->all_image_info_addr;
7687 }
7688 
7689 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7690 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7691 {
7692 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7693 	    task_filters.mach_kobj_filter_mask, &mask);
7694 }
7695 
7696 #endif /* CONFIG_MACF */
7697 
7698 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7699 task_set_thread_limit(task_t task, uint16_t thread_limit)
7700 {
7701 	assert(task != kernel_task);
7702 	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7703 		task_lock(task);
7704 		task->task_thread_limit = thread_limit;
7705 		task_unlock(task);
7706 	}
7707 }
7708 
7709 #if CONFIG_PROC_RESOURCE_LIMITS
7710 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7711 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7712 {
7713 	return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7714 }
7715 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7716 
7717 #if XNU_TARGET_OS_OSX
7718 boolean_t
task_has_system_version_compat_enabled(task_t task)7719 task_has_system_version_compat_enabled(task_t task)
7720 {
7721 	boolean_t enabled = FALSE;
7722 
7723 	task_lock(task);
7724 	enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7725 	task_unlock(task);
7726 
7727 	return enabled;
7728 }
7729 
7730 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7731 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7732 {
7733 	assert(task == current_task());
7734 	assert(task != kernel_task);
7735 
7736 	task_lock(task);
7737 	if (enable_system_version_compat) {
7738 		task->t_flags |= TF_SYS_VERSION_COMPAT;
7739 	} else {
7740 		task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7741 	}
7742 	task_unlock(task);
7743 }
7744 #endif /* XNU_TARGET_OS_OSX */
7745 
7746 /*
7747  * We need to export some functions to other components that
7748  * are currently implemented in macros within the osfmk
7749  * component.  Just export them as functions of the same name.
7750  */
7751 boolean_t
is_kerneltask(task_t t)7752 is_kerneltask(task_t t)
7753 {
7754 	if (t == kernel_task) {
7755 		return TRUE;
7756 	}
7757 
7758 	return FALSE;
7759 }
7760 
7761 boolean_t
is_corpsefork(task_t t)7762 is_corpsefork(task_t t)
7763 {
7764 	return task_is_a_corpse_fork(t);
7765 }
7766 
7767 task_t
current_task_early(void)7768 current_task_early(void)
7769 {
7770 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7771 		if (current_thread()->t_tro == NULL) {
7772 			return TASK_NULL;
7773 		}
7774 	}
7775 	return get_threadtask(current_thread());
7776 }
7777 
7778 task_t
current_task(void)7779 current_task(void)
7780 {
7781 	return get_threadtask(current_thread());
7782 }
7783 
7784 /* defined in bsd/kern/kern_prot.c */
7785 extern int get_audit_token_pid(audit_token_t *audit_token);
7786 
7787 int
task_pid(task_t task)7788 task_pid(task_t task)
7789 {
7790 	if (task) {
7791 		return get_audit_token_pid(task_get_audit_token(task));
7792 	}
7793 	return -1;
7794 }
7795 
7796 #if __has_feature(ptrauth_calls)
7797 /*
7798  * Get the shared region id and jop signing key for the task.
7799  * The function will allocate a kalloc buffer and return
7800  * it to caller, the caller needs to free it. This is used
7801  * for getting the information via task port.
7802  */
7803 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7804 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7805 {
7806 	size_t len;
7807 	char *shared_region_id = NULL;
7808 
7809 	task_lock(task);
7810 	if (task->shared_region_id == NULL) {
7811 		task_unlock(task);
7812 		return NULL;
7813 	}
7814 	len = strlen(task->shared_region_id) + 1;
7815 
7816 	/* don't hold task lock while allocating */
7817 	task_unlock(task);
7818 	shared_region_id = kalloc_data(len, Z_WAITOK);
7819 	task_lock(task);
7820 
7821 	if (task->shared_region_id == NULL) {
7822 		task_unlock(task);
7823 		kfree_data(shared_region_id, len);
7824 		return NULL;
7825 	}
7826 	assert(len == strlen(task->shared_region_id) + 1);         /* should never change */
7827 	strlcpy(shared_region_id, task->shared_region_id, len);
7828 	task_unlock(task);
7829 
7830 	/* find key from its auth pager */
7831 	if (jop_pid != NULL) {
7832 		*jop_pid = shared_region_find_key(shared_region_id);
7833 	}
7834 
7835 	return shared_region_id;
7836 }
7837 
7838 /*
7839  * set the shared region id for a task
7840  */
7841 void
task_set_shared_region_id(task_t task,char * id)7842 task_set_shared_region_id(task_t task, char *id)
7843 {
7844 	char *old_id;
7845 
7846 	task_lock(task);
7847 	old_id = task->shared_region_id;
7848 	task->shared_region_id = id;
7849 	task->shared_region_auth_remapped = FALSE;
7850 	task_unlock(task);
7851 
7852 	/* free any pre-existing shared region id */
7853 	if (old_id != NULL) {
7854 		shared_region_key_dealloc(old_id);
7855 		kfree_data(old_id, strlen(old_id) + 1);
7856 	}
7857 }
7858 #endif /* __has_feature(ptrauth_calls) */
7859 
7860 /*
7861  * This routine finds a thread in a task by its unique id
7862  * Returns a referenced thread or THREAD_NULL if the thread was not found
7863  *
7864  * TODO: This is super inefficient - it's an O(threads in task) list walk!
7865  *       We should make a tid hash, or transition all tid clients to thread ports
7866  *
7867  * Precondition: No locks held (will take task lock)
7868  */
7869 thread_t
task_findtid(task_t task,uint64_t tid)7870 task_findtid(task_t task, uint64_t tid)
7871 {
7872 	thread_t self           = current_thread();
7873 	thread_t found_thread   = THREAD_NULL;
7874 	thread_t iter_thread    = THREAD_NULL;
7875 
7876 	/* Short-circuit the lookup if we're looking up ourselves */
7877 	if (tid == self->thread_id || tid == TID_NULL) {
7878 		assert(get_threadtask(self) == task);
7879 
7880 		thread_reference(self);
7881 
7882 		return self;
7883 	}
7884 
7885 	task_lock(task);
7886 
7887 	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7888 		if (iter_thread->thread_id == tid) {
7889 			found_thread = iter_thread;
7890 			thread_reference(found_thread);
7891 			break;
7892 		}
7893 	}
7894 
7895 	task_unlock(task);
7896 
7897 	return found_thread;
7898 }
7899 
7900 int
pid_from_task(task_t task)7901 pid_from_task(task_t task)
7902 {
7903 	int pid = -1;
7904 	void *bsd_info = get_bsdtask_info(task);
7905 
7906 	if (bsd_info) {
7907 		pid = proc_pid(bsd_info);
7908 	} else {
7909 		pid = task_pid(task);
7910 	}
7911 
7912 	return pid;
7913 }
7914 
7915 /*
7916  * Control the CPU usage monitor for a task.
7917  */
7918 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7919 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7920 {
7921 	int error = KERN_SUCCESS;
7922 
7923 	if (*flags & CPUMON_MAKE_FATAL) {
7924 		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7925 	} else {
7926 		error = KERN_INVALID_ARGUMENT;
7927 	}
7928 
7929 	return error;
7930 }
7931 
7932 /*
7933  * Control the wakeups monitor for a task.
7934  */
7935 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7936 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7937 {
7938 	ledger_t ledger = task->ledger;
7939 
7940 	task_lock(task);
7941 	if (*flags & WAKEMON_GET_PARAMS) {
7942 		ledger_amount_t limit;
7943 		uint64_t                period;
7944 
7945 		ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7946 		ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7947 
7948 		if (limit != LEDGER_LIMIT_INFINITY) {
7949 			/*
7950 			 * An active limit means the wakeups monitor is enabled.
7951 			 */
7952 			*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7953 			*flags = WAKEMON_ENABLE;
7954 			if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7955 				*flags |= WAKEMON_MAKE_FATAL;
7956 			}
7957 		} else {
7958 			*flags = WAKEMON_DISABLE;
7959 			*rate_hz = -1;
7960 		}
7961 
7962 		/*
7963 		 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7964 		 */
7965 		task_unlock(task);
7966 		return KERN_SUCCESS;
7967 	}
7968 
7969 	if (*flags & WAKEMON_ENABLE) {
7970 		if (*flags & WAKEMON_SET_DEFAULTS) {
7971 			*rate_hz = task_wakeups_monitor_rate;
7972 		}
7973 
7974 #ifndef CONFIG_NOMONITORS
7975 		if (*flags & WAKEMON_MAKE_FATAL) {
7976 			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7977 		}
7978 #endif /* CONFIG_NOMONITORS */
7979 
7980 		if (*rate_hz <= 0) {
7981 			task_unlock(task);
7982 			return KERN_INVALID_ARGUMENT;
7983 		}
7984 
7985 #ifndef CONFIG_NOMONITORS
7986 		ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7987 		    (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7988 		ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7989 		ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7990 #endif /* CONFIG_NOMONITORS */
7991 	} else if (*flags & WAKEMON_DISABLE) {
7992 		/*
7993 		 * Caller wishes to disable wakeups monitor on the task.
7994 		 *
7995 		 * Disable telemetry if it was triggered by the wakeups monitor, and
7996 		 * remove the limit & callback on the wakeups ledger entry.
7997 		 */
7998 #if CONFIG_TELEMETRY
7999 		telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
8000 #endif
8001 		ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
8002 		ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
8003 	}
8004 
8005 	task_unlock(task);
8006 	return KERN_SUCCESS;
8007 }
8008 
8009 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)8010 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
8011 {
8012 	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
8013 #if CONFIG_TELEMETRY
8014 		/*
8015 		 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
8016 		 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
8017 		 */
8018 		telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
8019 #endif
8020 		return;
8021 	}
8022 
8023 #if CONFIG_TELEMETRY
8024 	/*
8025 	 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
8026 	 * exceeded the limit, turn telemetry off for the task.
8027 	 */
8028 	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
8029 #endif
8030 
8031 	if (warning == 0) {
8032 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
8033 	}
8034 }
8035 
8036 TUNABLE(bool, enable_wakeup_reports, "enable_wakeup_reports", false); /* Enable wakeup reports. */
8037 
8038 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)8039 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
8040 {
8041 	task_t                      task        = current_task();
8042 	int                         pid         = 0;
8043 	const char                  *procname   = "unknown";
8044 	boolean_t                   fatal;
8045 	kern_return_t               kr;
8046 #ifdef EXC_RESOURCE_MONITORS
8047 	mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
8048 #endif /* EXC_RESOURCE_MONITORS */
8049 	struct ledger_entry_info    lei;
8050 
8051 #ifdef MACH_BSD
8052 	pid = proc_selfpid();
8053 	if (get_bsdtask_info(task) != NULL) {
8054 		procname = proc_name_address(get_bsdtask_info(current_task()));
8055 	}
8056 #endif
8057 
8058 	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
8059 
8060 	/*
8061 	 * Disable the exception notification so we don't overwhelm
8062 	 * the listener with an endless stream of redundant exceptions.
8063 	 * TODO: detect whether another thread is already reporting the violation.
8064 	 */
8065 	uint32_t flags = WAKEMON_DISABLE;
8066 	task_wakeups_monitor_ctl(task, &flags, NULL);
8067 
8068 	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
8069 	trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
8070 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
8071 	    "over ~%llu seconds, averaging %llu wakes / second and "
8072 	    "violating a %slimit of %llu wakes over %llu seconds.\n",
8073 	    procname, pid,
8074 	    lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
8075 	    lei.lei_last_refill == 0 ? 0 :
8076 	    (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
8077 	    fatal ? "FATAL " : "",
8078 	    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
8079 
8080 	if (enable_wakeup_reports) {
8081 		kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
8082 		    fatal ? kRNFatalLimitFlag : 0);
8083 		if (kr) {
8084 			printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
8085 		}
8086 	}
8087 
8088 #ifdef EXC_RESOURCE_MONITORS
8089 	if (disable_exc_resource) {
8090 		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8091 		    "suppressed by a boot-arg\n", procname, pid);
8092 		return;
8093 	}
8094 	if (disable_exc_resource_during_audio && audio_active) {
8095 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8096 		    "suppressed due to audio playback\n", procname, pid);
8097 		return;
8098 	}
8099 	if (lei.lei_last_refill == 0) {
8100 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8101 		    "suppressed due to lei.lei_last_refill = 0 \n", procname, pid);
8102 	}
8103 
8104 	code[0] = code[1] = 0;
8105 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
8106 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
8107 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
8108 	    NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
8109 	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
8110 	    lei.lei_last_refill);
8111 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
8112 	    NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
8113 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8114 #endif /* EXC_RESOURCE_MONITORS */
8115 
8116 	if (fatal) {
8117 		task_terminate_internal(task);
8118 	}
8119 }
8120 
8121 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)8122 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
8123 {
8124 	int64_t old_count, new_count;
8125 	boolean_t needs_telemetry;
8126 
8127 	do {
8128 		new_count = old_count = *global_write_count;
8129 		new_count += io_delta;
8130 		if (new_count >= io_telemetry_limit) {
8131 			new_count = 0;
8132 			needs_telemetry = TRUE;
8133 		} else {
8134 			needs_telemetry = FALSE;
8135 		}
8136 	} while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
8137 	return needs_telemetry;
8138 }
8139 
8140 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)8141 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
8142 {
8143 #if CONFIG_PHYS_WRITE_ACCT
8144 	if (!io_size) {
8145 		return;
8146 	}
8147 
8148 	/*
8149 	 * task == NULL means that we have to update kernel_task ledgers
8150 	 */
8151 	if (!task) {
8152 		task = kernel_task;
8153 	}
8154 
8155 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
8156 	    task_pid(task), flavor, io_size, flags, 0);
8157 	DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
8158 
8159 	if (flags & TASK_BALANCE_CREDIT) {
8160 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8161 			OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8162 			ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8163 		}
8164 	} else if (flags & TASK_BALANCE_DEBIT) {
8165 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8166 			OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8167 			ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8168 		}
8169 	}
8170 #endif /* CONFIG_PHYS_WRITE_ACCT */
8171 }
8172 
8173 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)8174 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
8175 {
8176 	int64_t io_delta = 0;
8177 	int64_t * global_counter_to_update;
8178 	boolean_t needs_telemetry = FALSE;
8179 	boolean_t is_external_device = FALSE;
8180 	int ledger_to_update = 0;
8181 	struct task_writes_counters * writes_counters_to_update;
8182 
8183 	if ((!task) || (!io_size) || (!vp)) {
8184 		return;
8185 	}
8186 
8187 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
8188 	    task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
8189 	DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
8190 
8191 	// Is the drive backing this vnode internal or external to the system?
8192 	if (vnode_isonexternalstorage(vp) == false) {
8193 		global_counter_to_update = &global_logical_writes_count;
8194 		ledger_to_update = task_ledgers.logical_writes;
8195 		writes_counters_to_update = &task->task_writes_counters_internal;
8196 		is_external_device = FALSE;
8197 	} else {
8198 		global_counter_to_update = &global_logical_writes_to_external_count;
8199 		ledger_to_update = task_ledgers.logical_writes_to_external;
8200 		writes_counters_to_update = &task->task_writes_counters_external;
8201 		is_external_device = TRUE;
8202 	}
8203 
8204 	switch (flags) {
8205 	case TASK_WRITE_IMMEDIATE:
8206 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
8207 		ledger_credit(task->ledger, ledger_to_update, io_size);
8208 		if (!is_external_device) {
8209 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8210 		}
8211 		break;
8212 	case TASK_WRITE_DEFERRED:
8213 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
8214 		ledger_credit(task->ledger, ledger_to_update, io_size);
8215 		if (!is_external_device) {
8216 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8217 		}
8218 		break;
8219 	case TASK_WRITE_INVALIDATED:
8220 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
8221 		ledger_debit(task->ledger, ledger_to_update, io_size);
8222 		if (!is_external_device) {
8223 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
8224 		}
8225 		break;
8226 	case TASK_WRITE_METADATA:
8227 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
8228 		ledger_credit(task->ledger, ledger_to_update, io_size);
8229 		if (!is_external_device) {
8230 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8231 		}
8232 		break;
8233 	}
8234 
8235 	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
8236 	if (io_telemetry_limit != 0) {
8237 		/* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
8238 		needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
8239 		if (needs_telemetry && !is_external_device) {
8240 			act_set_io_telemetry_ast(current_thread());
8241 		}
8242 	}
8243 }
8244 
8245 /*
8246  * Control the I/O monitor for a task.
8247  */
8248 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)8249 task_io_monitor_ctl(task_t task, uint32_t *flags)
8250 {
8251 	ledger_t ledger = task->ledger;
8252 
8253 	task_lock(task);
8254 	if (*flags & IOMON_ENABLE) {
8255 		/* Configure the physical I/O ledger */
8256 		ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
8257 		ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
8258 	} else if (*flags & IOMON_DISABLE) {
8259 		/*
8260 		 * Caller wishes to disable I/O monitor on the task.
8261 		 */
8262 		ledger_disable_refill(ledger, task_ledgers.physical_writes);
8263 		ledger_disable_callback(ledger, task_ledgers.physical_writes);
8264 	}
8265 
8266 	task_unlock(task);
8267 	return KERN_SUCCESS;
8268 }
8269 
8270 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)8271 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
8272 {
8273 	if (warning == 0) {
8274 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
8275 	}
8276 }
8277 
8278 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)8279 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
8280 {
8281 	int                             pid = 0;
8282 	task_t                          task = current_task();
8283 #ifdef EXC_RESOURCE_MONITORS
8284 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
8285 #endif /* EXC_RESOURCE_MONITORS */
8286 	struct ledger_entry_info        lei = {};
8287 	kern_return_t                   kr;
8288 
8289 #ifdef MACH_BSD
8290 	pid = proc_selfpid();
8291 #endif
8292 	/*
8293 	 * Get the ledger entry info. We need to do this before disabling the exception
8294 	 * to get correct values for all fields.
8295 	 */
8296 	switch (flavor) {
8297 	case FLAVOR_IO_PHYSICAL_WRITES:
8298 		ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
8299 		break;
8300 	}
8301 
8302 
8303 	/*
8304 	 * Disable the exception notification so we don't overwhelm
8305 	 * the listener with an endless stream of redundant exceptions.
8306 	 * TODO: detect whether another thread is already reporting the violation.
8307 	 */
8308 	uint32_t flags = IOMON_DISABLE;
8309 	task_io_monitor_ctl(task, &flags);
8310 
8311 	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
8312 		trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
8313 	}
8314 	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
8315 	    pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
8316 
8317 	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
8318 	if (kr) {
8319 		printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
8320 	}
8321 
8322 #ifdef EXC_RESOURCE_MONITORS
8323 	code[0] = code[1] = 0;
8324 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
8325 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
8326 	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
8327 	EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
8328 	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
8329 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8330 #endif /* EXC_RESOURCE_MONITORS */
8331 }
8332 
8333 void
task_port_space_ast(__unused task_t task)8334 task_port_space_ast(__unused task_t task)
8335 {
8336 	uint32_t current_size, soft_limit, hard_limit;
8337 	assert(task == current_task());
8338 	bool should_notify = ipc_space_check_table_size_limit(task->itk_space,
8339 	    &current_size, &soft_limit, &hard_limit);
8340 	if (should_notify) {
8341 		SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
8342 	}
8343 }
8344 
8345 #if CONFIG_PROC_RESOURCE_LIMITS
8346 static mach_port_t
task_allocate_fatal_port(void)8347 task_allocate_fatal_port(void)
8348 {
8349 	mach_port_t task_fatal_port = MACH_PORT_NULL;
8350 	task_id_token_t token;
8351 
8352 	kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
8353 	if (kr) {
8354 		return MACH_PORT_NULL;
8355 	}
8356 	task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
8357 	    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
8358 
8359 	task_id_token_set_port(token, task_fatal_port);
8360 
8361 	return task_fatal_port;
8362 }
8363 
8364 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)8365 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
8366 {
8367 	task_t task = TASK_NULL;
8368 	kern_return_t kr;
8369 
8370 	task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
8371 
8372 	assert(token != NULL);
8373 	if (token) {
8374 		kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
8375 		if (task) {
8376 			task_bsdtask_kill(task);
8377 			task_deallocate(task);
8378 		}
8379 		task_id_token_release(token); /* consumes ref given by notification */
8380 	}
8381 }
8382 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8383 
8384 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)8385 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
8386 {
8387 	int pid = 0;
8388 	char *procname = (char *) "unknown";
8389 	__unused kern_return_t kr;
8390 	__unused resource_notify_flags_t flags = kRNFlagsNone;
8391 	__unused uint32_t limit;
8392 	__unused mach_port_t task_fatal_port = MACH_PORT_NULL;
8393 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
8394 
8395 	pid = proc_selfpid();
8396 	if (get_bsdtask_info(task) != NULL) {
8397 		procname = proc_name_address(get_bsdtask_info(task));
8398 	}
8399 
8400 	/*
8401 	 * Only kernel_task and launchd may be allowed to
8402 	 * have really large ipc space.
8403 	 */
8404 	if (pid == 0 || pid == 1) {
8405 		return;
8406 	}
8407 
8408 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
8409 	    Num of ports allocated %u; \n", procname, pid, current_size);
8410 
8411 	/* Abort the process if it has hit the system-wide limit for ipc port table size */
8412 	if (!hard_limit && !soft_limit) {
8413 		code[0] = code[1] = 0;
8414 		EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
8415 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
8416 		EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
8417 
8418 		exit_with_port_space_exception(current_proc(), code[0], code[1]);
8419 
8420 		return;
8421 	}
8422 
8423 #if CONFIG_PROC_RESOURCE_LIMITS
8424 	if (hard_limit > 0) {
8425 		flags |= kRNHardLimitFlag;
8426 		limit = hard_limit;
8427 		task_fatal_port = task_allocate_fatal_port();
8428 		if (!task_fatal_port) {
8429 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8430 			task_bsdtask_kill(task);
8431 		}
8432 	} else {
8433 		flags |= kRNSoftLimitFlag;
8434 		limit = soft_limit;
8435 	}
8436 
8437 	kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8438 	if (kr) {
8439 		os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
8440 	}
8441 	if (task_fatal_port) {
8442 		ipc_port_release_send(task_fatal_port);
8443 	}
8444 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8445 }
8446 
8447 #if CONFIG_PROC_RESOURCE_LIMITS
8448 void
task_kqworkloop_ast(task_t task,int current_size,int soft_limit,int hard_limit)8449 task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit)
8450 {
8451 	assert(task == current_task());
8452 	return SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task, current_size, soft_limit, hard_limit);
8453 }
8454 
8455 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task,int current_size,int soft_limit,int hard_limit)8456 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit)
8457 {
8458 	int pid = 0;
8459 	char *procname = (char *) "unknown";
8460 #ifdef MACH_BSD
8461 	pid = proc_selfpid();
8462 	if (get_bsdtask_info(task) != NULL) {
8463 		procname = proc_name_address(get_bsdtask_info(task));
8464 	}
8465 #endif
8466 	if (pid == 0 || pid == 1) {
8467 		return;
8468 	}
8469 
8470 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many kqworkloops. \
8471 	    Num of kqworkloops allocated %u; \n", procname, pid, current_size);
8472 
8473 	int limit = 0;
8474 	resource_notify_flags_t flags = kRNFlagsNone;
8475 	mach_port_t task_fatal_port = MACH_PORT_NULL;
8476 	if (hard_limit) {
8477 		flags |= kRNHardLimitFlag;
8478 		limit = hard_limit;
8479 
8480 		task_fatal_port = task_allocate_fatal_port();
8481 		if (task_fatal_port == MACH_PORT_NULL) {
8482 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8483 			task_bsdtask_kill(task);
8484 		}
8485 	} else {
8486 		flags |= kRNSoftLimitFlag;
8487 		limit = soft_limit;
8488 	}
8489 
8490 	kern_return_t kr;
8491 	kr = send_resource_violation_with_fatal_port(send_kqworkloops_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8492 	if (kr) {
8493 		os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(kqworkloops, ...): error %#x\n", kr);
8494 	}
8495 	if (task_fatal_port) {
8496 		ipc_port_release_send(task_fatal_port);
8497 	}
8498 }
8499 
8500 
8501 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)8502 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
8503 {
8504 	assert(task == current_task());
8505 	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
8506 }
8507 
8508 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)8509 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
8510 {
8511 	int pid = 0;
8512 	char *procname = (char *) "unknown";
8513 	kern_return_t kr;
8514 	resource_notify_flags_t flags = kRNFlagsNone;
8515 	int limit;
8516 	mach_port_t task_fatal_port = MACH_PORT_NULL;
8517 
8518 #ifdef MACH_BSD
8519 	pid = proc_selfpid();
8520 	if (get_bsdtask_info(task) != NULL) {
8521 		procname = proc_name_address(get_bsdtask_info(task));
8522 	}
8523 #endif
8524 	/*
8525 	 * Only kernel_task and launchd may be allowed to
8526 	 * have really large ipc space.
8527 	 */
8528 	if (pid == 0 || pid == 1) {
8529 		return;
8530 	}
8531 
8532 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8533 	    Num of fds allocated %u; \n", procname, pid, current_size);
8534 
8535 	if (hard_limit > 0) {
8536 		flags |= kRNHardLimitFlag;
8537 		limit = hard_limit;
8538 		task_fatal_port = task_allocate_fatal_port();
8539 		if (!task_fatal_port) {
8540 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8541 			task_bsdtask_kill(task);
8542 		}
8543 	} else {
8544 		flags |= kRNSoftLimitFlag;
8545 		limit = soft_limit;
8546 	}
8547 
8548 	kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8549 	if (kr) {
8550 		os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8551 	}
8552 	if (task_fatal_port) {
8553 		ipc_port_release_send(task_fatal_port);
8554 	}
8555 }
8556 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8557 
8558 /* Placeholders for the task set/get voucher interfaces */
8559 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)8560 task_get_mach_voucher(
8561 	task_t                  task,
8562 	mach_voucher_selector_t __unused which,
8563 	ipc_voucher_t           *voucher)
8564 {
8565 	if (TASK_NULL == task) {
8566 		return KERN_INVALID_TASK;
8567 	}
8568 
8569 	*voucher = NULL;
8570 	return KERN_SUCCESS;
8571 }
8572 
8573 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)8574 task_set_mach_voucher(
8575 	task_t                  task,
8576 	ipc_voucher_t           __unused voucher)
8577 {
8578 	if (TASK_NULL == task) {
8579 		return KERN_INVALID_TASK;
8580 	}
8581 
8582 	return KERN_SUCCESS;
8583 }
8584 
8585 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)8586 task_swap_mach_voucher(
8587 	__unused task_t         task,
8588 	__unused ipc_voucher_t  new_voucher,
8589 	ipc_voucher_t          *in_out_old_voucher)
8590 {
8591 	/*
8592 	 * Currently this function is only called from a MIG generated
8593 	 * routine which doesn't release the reference on the voucher
8594 	 * addressed by in_out_old_voucher. To avoid leaking this reference,
8595 	 * a call to release it has been added here.
8596 	 */
8597 	ipc_voucher_release(*in_out_old_voucher);
8598 	OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
8599 }
8600 
8601 void
task_set_gpu_denied(task_t task,boolean_t denied)8602 task_set_gpu_denied(task_t task, boolean_t denied)
8603 {
8604 	task_lock(task);
8605 
8606 	if (denied) {
8607 		task->t_flags |= TF_GPU_DENIED;
8608 	} else {
8609 		task->t_flags &= ~TF_GPU_DENIED;
8610 	}
8611 
8612 	task_unlock(task);
8613 }
8614 
8615 boolean_t
task_is_gpu_denied(task_t task)8616 task_is_gpu_denied(task_t task)
8617 {
8618 	/* We don't need the lock to read this flag */
8619 	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8620 }
8621 
8622 /*
8623  * Task policy termination uses this path to clear the bit the final time
8624  * during the termination flow, and the TASK_POLICY_TERMINATED bit guarantees
8625  * that it won't be changed again on a terminated task.
8626  */
8627 bool
task_set_game_mode_locked(task_t task,bool enabled)8628 task_set_game_mode_locked(task_t task, bool enabled)
8629 {
8630 	task_lock_assert_owned(task);
8631 
8632 	if (enabled) {
8633 		assert(proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0);
8634 	}
8635 
8636 	bool previously_enabled = task_get_game_mode(task);
8637 	bool needs_update = false;
8638 	uint32_t new_count = 0;
8639 
8640 	if (enabled) {
8641 		task->t_flags |= TF_GAME_MODE;
8642 	} else {
8643 		task->t_flags &= ~TF_GAME_MODE;
8644 	}
8645 
8646 	if (enabled && !previously_enabled) {
8647 		if (task_coalition_adjust_game_mode_count(task, 1, &new_count) && (new_count == 1)) {
8648 			needs_update = true;
8649 		}
8650 	} else if (!enabled && previously_enabled) {
8651 		if (task_coalition_adjust_game_mode_count(task, -1, &new_count) && (new_count == 0)) {
8652 			needs_update = true;
8653 		}
8654 	}
8655 
8656 	return needs_update;
8657 }
8658 
8659 void
task_set_game_mode(task_t task,bool enabled)8660 task_set_game_mode(task_t task, bool enabled)
8661 {
8662 	bool needs_update = false;
8663 
8664 	task_lock(task);
8665 
8666 	/* After termination, further updates are no longer effective */
8667 	if (proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0) {
8668 		needs_update = task_set_game_mode_locked(task, enabled);
8669 	}
8670 
8671 	task_unlock(task);
8672 
8673 #if CONFIG_THREAD_GROUPS
8674 	if (needs_update) {
8675 		task_coalition_thread_group_game_mode_update(task);
8676 	}
8677 #endif /* CONFIG_THREAD_GROUPS */
8678 }
8679 
8680 bool
task_get_game_mode(task_t task)8681 task_get_game_mode(task_t task)
8682 {
8683 	/* We don't need the lock to read this flag */
8684 	return task->t_flags & TF_GAME_MODE;
8685 }
8686 
8687 
8688 uint64_t
get_task_memory_region_count(task_t task)8689 get_task_memory_region_count(task_t task)
8690 {
8691 	vm_map_t map;
8692 	map = (task == kernel_task) ? kernel_map: task->map;
8693 	return (uint64_t)get_map_nentries(map);
8694 }
8695 
8696 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)8697 kdebug_trace_dyld_internal(uint32_t base_code,
8698     struct dyld_kernel_image_info *info)
8699 {
8700 	static_assert(sizeof(info->uuid) >= 16);
8701 
8702 #if defined(__LP64__)
8703 	uint64_t *uuid = (uint64_t *)&(info->uuid);
8704 
8705 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8706 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8707 	    uuid[1], info->load_addr,
8708 	    (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8709 	    0);
8710 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8711 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8712 	    (uint64_t)info->fsobjid.fid_objno |
8713 	    ((uint64_t)info->fsobjid.fid_generation << 32),
8714 	    0, 0, 0, 0);
8715 #else /* defined(__LP64__) */
8716 	uint32_t *uuid = (uint32_t *)&(info->uuid);
8717 
8718 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8719 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8720 	    uuid[1], uuid[2], uuid[3], 0);
8721 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8722 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8723 	    (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8724 	    info->fsobjid.fid_objno, 0);
8725 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8726 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8727 	    info->fsobjid.fid_generation, 0, 0, 0, 0);
8728 #endif /* !defined(__LP64__) */
8729 }
8730 
8731 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8732 kdebug_trace_dyld(task_t task, uint32_t base_code,
8733     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8734 {
8735 	kern_return_t kr;
8736 	dyld_kernel_image_info_array_t infos;
8737 	vm_map_offset_t map_data;
8738 	vm_offset_t data;
8739 
8740 	if (!infos_copy) {
8741 		return KERN_INVALID_ADDRESS;
8742 	}
8743 
8744 	if (!kdebug_enable ||
8745 	    !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8746 		vm_map_copy_discard(infos_copy);
8747 		return KERN_SUCCESS;
8748 	}
8749 
8750 	if (task == NULL || task != current_task()) {
8751 		return KERN_INVALID_TASK;
8752 	}
8753 
8754 	kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8755 	if (kr != KERN_SUCCESS) {
8756 		return kr;
8757 	}
8758 
8759 	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8760 
8761 	for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8762 		kdebug_trace_dyld_internal(base_code, &(infos[i]));
8763 	}
8764 
8765 	data = CAST_DOWN(vm_offset_t, map_data);
8766 	mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8767 	return KERN_SUCCESS;
8768 }
8769 
8770 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8771 task_register_dyld_image_infos(task_t task,
8772     dyld_kernel_image_info_array_t infos_copy,
8773     mach_msg_type_number_t infos_len)
8774 {
8775 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8776 	           (vm_map_copy_t)infos_copy, infos_len);
8777 }
8778 
8779 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8780 task_unregister_dyld_image_infos(task_t task,
8781     dyld_kernel_image_info_array_t infos_copy,
8782     mach_msg_type_number_t infos_len)
8783 {
8784 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8785 	           (vm_map_copy_t)infos_copy, infos_len);
8786 }
8787 
8788 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8789 task_get_dyld_image_infos(__unused task_t task,
8790     __unused dyld_kernel_image_info_array_t * dyld_images,
8791     __unused mach_msg_type_number_t * dyld_imagesCnt)
8792 {
8793 	return KERN_NOT_SUPPORTED;
8794 }
8795 
8796 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8797 task_register_dyld_shared_cache_image_info(task_t task,
8798     dyld_kernel_image_info_t cache_img,
8799     __unused boolean_t no_cache,
8800     __unused boolean_t private_cache)
8801 {
8802 	if (task == NULL || task != current_task()) {
8803 		return KERN_INVALID_TASK;
8804 	}
8805 
8806 	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8807 	return KERN_SUCCESS;
8808 }
8809 
8810 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8811 task_register_dyld_set_dyld_state(__unused task_t task,
8812     __unused uint8_t dyld_state)
8813 {
8814 	return KERN_NOT_SUPPORTED;
8815 }
8816 
8817 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8818 task_register_dyld_get_process_state(__unused task_t task,
8819     __unused dyld_kernel_process_info_t * dyld_process_state)
8820 {
8821 	return KERN_NOT_SUPPORTED;
8822 }
8823 
8824 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8825 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8826     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8827 {
8828 #if CONFIG_PERVASIVE_CPI
8829 	task_t task = (task_t)task_insp;
8830 	kern_return_t kr = KERN_SUCCESS;
8831 	mach_msg_type_number_t size;
8832 
8833 	if (task == TASK_NULL) {
8834 		return KERN_INVALID_ARGUMENT;
8835 	}
8836 
8837 	size = *size_in_out;
8838 
8839 	switch (flavor) {
8840 	case TASK_INSPECT_BASIC_COUNTS: {
8841 		struct task_inspect_basic_counts *bc =
8842 		    (struct task_inspect_basic_counts *)info_out;
8843 		struct recount_usage stats = { 0 };
8844 		if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8845 			kr = KERN_INVALID_ARGUMENT;
8846 			break;
8847 		}
8848 
8849 		recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8850 		bc->instructions = recount_usage_instructions(&stats);
8851 		bc->cycles = recount_usage_cycles(&stats);
8852 		size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8853 		break;
8854 	}
8855 	default:
8856 		kr = KERN_INVALID_ARGUMENT;
8857 		break;
8858 	}
8859 
8860 	if (kr == KERN_SUCCESS) {
8861 		*size_in_out = size;
8862 	}
8863 	return kr;
8864 #else /* CONFIG_PERVASIVE_CPI */
8865 #pragma unused(task_insp, flavor, info_out, size_in_out)
8866 	return KERN_NOT_SUPPORTED;
8867 #endif /* !CONFIG_PERVASIVE_CPI */
8868 }
8869 
8870 #if CONFIG_SECLUDED_MEMORY
8871 int num_tasks_can_use_secluded_mem = 0;
8872 
8873 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8874 task_set_can_use_secluded_mem(
8875 	task_t          task,
8876 	boolean_t       can_use_secluded_mem)
8877 {
8878 	if (!task->task_could_use_secluded_mem) {
8879 		return;
8880 	}
8881 	task_lock(task);
8882 	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8883 	task_unlock(task);
8884 }
8885 
8886 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8887 task_set_can_use_secluded_mem_locked(
8888 	task_t          task,
8889 	boolean_t       can_use_secluded_mem)
8890 {
8891 	assert(task->task_could_use_secluded_mem);
8892 	if (can_use_secluded_mem &&
8893 	    secluded_for_apps &&         /* global boot-arg */
8894 	    !task->task_can_use_secluded_mem) {
8895 		assert(num_tasks_can_use_secluded_mem >= 0);
8896 		OSAddAtomic(+1,
8897 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8898 		task->task_can_use_secluded_mem = TRUE;
8899 	} else if (!can_use_secluded_mem &&
8900 	    task->task_can_use_secluded_mem) {
8901 		assert(num_tasks_can_use_secluded_mem > 0);
8902 		OSAddAtomic(-1,
8903 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8904 		task->task_can_use_secluded_mem = FALSE;
8905 	}
8906 }
8907 
8908 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8909 task_set_could_use_secluded_mem(
8910 	task_t          task,
8911 	boolean_t       could_use_secluded_mem)
8912 {
8913 	task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8914 }
8915 
8916 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8917 task_set_could_also_use_secluded_mem(
8918 	task_t          task,
8919 	boolean_t       could_also_use_secluded_mem)
8920 {
8921 	task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8922 }
8923 
8924 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8925 task_can_use_secluded_mem(
8926 	task_t          task,
8927 	boolean_t       is_alloc)
8928 {
8929 	if (task->task_can_use_secluded_mem) {
8930 		assert(task->task_could_use_secluded_mem);
8931 		assert(num_tasks_can_use_secluded_mem > 0);
8932 		return TRUE;
8933 	}
8934 	if (task->task_could_also_use_secluded_mem &&
8935 	    num_tasks_can_use_secluded_mem > 0) {
8936 		assert(num_tasks_can_use_secluded_mem > 0);
8937 		return TRUE;
8938 	}
8939 
8940 	/*
8941 	 * If a single task is using more than some large amount of
8942 	 * memory (i.e. secluded_shutoff_trigger) and is approaching
8943 	 * its task limit, allow it to dip into secluded and begin
8944 	 * suppression of rebuilding secluded memory until that task exits.
8945 	 */
8946 	if (is_alloc && secluded_shutoff_trigger != 0) {
8947 		uint64_t phys_used = get_task_phys_footprint(task);
8948 		uint64_t limit = get_task_phys_footprint_limit(task);
8949 		if (phys_used > secluded_shutoff_trigger &&
8950 		    limit > secluded_shutoff_trigger &&
8951 		    phys_used > limit - secluded_shutoff_headroom) {
8952 			start_secluded_suppression(task);
8953 			return TRUE;
8954 		}
8955 	}
8956 
8957 	return FALSE;
8958 }
8959 
8960 boolean_t
task_could_use_secluded_mem(task_t task)8961 task_could_use_secluded_mem(
8962 	task_t  task)
8963 {
8964 	return task->task_could_use_secluded_mem;
8965 }
8966 
8967 boolean_t
task_could_also_use_secluded_mem(task_t task)8968 task_could_also_use_secluded_mem(
8969 	task_t  task)
8970 {
8971 	return task->task_could_also_use_secluded_mem;
8972 }
8973 #endif /* CONFIG_SECLUDED_MEMORY */
8974 
8975 queue_head_t *
task_io_user_clients(task_t task)8976 task_io_user_clients(task_t task)
8977 {
8978 	return &task->io_user_clients;
8979 }
8980 
8981 void
task_set_message_app_suspended(task_t task,boolean_t enable)8982 task_set_message_app_suspended(task_t task, boolean_t enable)
8983 {
8984 	task->message_app_suspended = enable;
8985 }
8986 
8987 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8988 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8989 {
8990 	dst_task->vtimers = src_task->vtimers;
8991 }
8992 
8993 #if DEVELOPMENT || DEBUG
8994 int vm_region_footprint = 0;
8995 #endif /* DEVELOPMENT || DEBUG */
8996 
8997 boolean_t
task_self_region_footprint(void)8998 task_self_region_footprint(void)
8999 {
9000 #if DEVELOPMENT || DEBUG
9001 	if (vm_region_footprint) {
9002 		/* system-wide override */
9003 		return TRUE;
9004 	}
9005 #endif /* DEVELOPMENT || DEBUG */
9006 	return current_task()->task_region_footprint;
9007 }
9008 
9009 void
task_self_region_footprint_set(boolean_t newval)9010 task_self_region_footprint_set(
9011 	boolean_t newval)
9012 {
9013 	task_t  curtask;
9014 
9015 	curtask = current_task();
9016 	task_lock(curtask);
9017 	if (newval) {
9018 		curtask->task_region_footprint = TRUE;
9019 	} else {
9020 		curtask->task_region_footprint = FALSE;
9021 	}
9022 	task_unlock(curtask);
9023 }
9024 
9025 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)9026 task_set_darkwake_mode(task_t task, boolean_t set_mode)
9027 {
9028 	assert(task);
9029 
9030 	task_lock(task);
9031 
9032 	if (set_mode) {
9033 		task->t_flags |= TF_DARKWAKE_MODE;
9034 	} else {
9035 		task->t_flags &= ~(TF_DARKWAKE_MODE);
9036 	}
9037 
9038 	task_unlock(task);
9039 }
9040 
9041 boolean_t
task_get_darkwake_mode(task_t task)9042 task_get_darkwake_mode(task_t task)
9043 {
9044 	assert(task);
9045 	return (task->t_flags & TF_DARKWAKE_MODE) != 0;
9046 }
9047 
9048 /*
9049  * Set default behavior for task's control port and EXC_GUARD variants that have
9050  * settable behavior.
9051  *
9052  * Platform binaries typically have one behavior, third parties another -
9053  * but there are special exception we may need to account for.
9054  */
9055 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)9056 task_set_exc_guard_ctrl_port_default(
9057 	task_t task,
9058 	thread_t main_thread,
9059 	const char *name,
9060 	unsigned int namelen,
9061 	boolean_t is_simulated,
9062 	uint32_t platform,
9063 	uint32_t sdk)
9064 {
9065 	task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9066 
9067 	if (task_is_hardened_binary(task)) {
9068 		/* set exc guard default behavior for hardened binaries */
9069 		task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
9070 
9071 		if (1 == task_pid(task)) {
9072 			/* special flags for inittask - delivery every instance as corpse */
9073 			task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
9074 		} else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
9075 			/* honor by-name default setting overrides */
9076 
9077 			int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
9078 
9079 			for (int i = 0; i < count; i++) {
9080 				const struct task_exc_guard_named_default *named_default =
9081 				    &task_exc_guard_named_defaults[i];
9082 				if (strncmp(named_default->name, name, namelen) == 0 &&
9083 				    strlen(named_default->name) == namelen) {
9084 					task->task_exc_guard = named_default->behavior;
9085 					break;
9086 				}
9087 			}
9088 		}
9089 
9090 		/* set control port options for 1p code, inherited from parent task by default */
9091 		opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
9092 	} else {
9093 		/* set exc guard default behavior for third-party code */
9094 		task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
9095 		/* set control port options for 3p code, inherited from parent task by default */
9096 		opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
9097 	}
9098 
9099 	if (is_simulated) {
9100 		/* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
9101 		if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
9102 		    (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
9103 		    (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
9104 			task->task_exc_guard = TASK_EXC_GUARD_NONE;
9105 		}
9106 		/* Disable protection for control ports for simulated binaries */
9107 		opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9108 	}
9109 
9110 
9111 	task_set_control_port_options(task, opts);
9112 
9113 	task_set_immovable_pinned(task);
9114 	main_thread_set_immovable_pinned(main_thread);
9115 }
9116 
9117 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)9118 task_get_exc_guard_behavior(
9119 	task_t task,
9120 	task_exc_guard_behavior_t *behaviorp)
9121 {
9122 	if (task == TASK_NULL) {
9123 		return KERN_INVALID_TASK;
9124 	}
9125 	*behaviorp = task->task_exc_guard;
9126 	return KERN_SUCCESS;
9127 }
9128 
9129 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)9130 task_set_exc_guard_behavior(
9131 	task_t task,
9132 	task_exc_guard_behavior_t new_behavior)
9133 {
9134 	if (task == TASK_NULL) {
9135 		return KERN_INVALID_TASK;
9136 	}
9137 	if (new_behavior & ~TASK_EXC_GUARD_ALL) {
9138 		return KERN_INVALID_VALUE;
9139 	}
9140 
9141 	/* limit setting to that allowed for this config */
9142 	new_behavior = new_behavior & task_exc_guard_config_mask;
9143 
9144 #if !defined (DEBUG) && !defined (DEVELOPMENT)
9145 	/* On release kernels, only allow _upgrading_ exc guard behavior */
9146 	task_exc_guard_behavior_t cur_behavior;
9147 
9148 	os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
9149 		if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
9150 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
9151 		}
9152 
9153 		if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
9154 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
9155 		}
9156 
9157 		/* no restrictions on CORPSE bit */
9158 	});
9159 #else
9160 	task->task_exc_guard = new_behavior;
9161 #endif
9162 	return KERN_SUCCESS;
9163 }
9164 
9165 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)9166 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
9167 {
9168 #if DEVELOPMENT || DEBUG
9169 	if (task == TASK_NULL) {
9170 		return KERN_INVALID_TASK;
9171 	}
9172 
9173 	task_lock(task);
9174 	if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
9175 		task->t_flags |= TF_NO_CORPSE_FORKING;
9176 	} else {
9177 		task->t_flags &= ~TF_NO_CORPSE_FORKING;
9178 	}
9179 	task_unlock(task);
9180 
9181 	return KERN_SUCCESS;
9182 #else
9183 	(void)task;
9184 	(void)behavior;
9185 	return KERN_NOT_SUPPORTED;
9186 #endif
9187 }
9188 
9189 boolean_t
task_corpse_forking_disabled(task_t task)9190 task_corpse_forking_disabled(task_t task)
9191 {
9192 	boolean_t disabled = FALSE;
9193 
9194 	task_lock(task);
9195 	disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
9196 	task_unlock(task);
9197 
9198 	return disabled;
9199 }
9200 
9201 #if __arm64__
9202 extern int legacy_footprint_entitlement_mode;
9203 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
9204 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
9205 
9206 
9207 void
task_set_legacy_footprint(task_t task)9208 task_set_legacy_footprint(
9209 	task_t task)
9210 {
9211 	task_lock(task);
9212 	task->task_legacy_footprint = TRUE;
9213 	task_unlock(task);
9214 }
9215 
9216 void
task_set_extra_footprint_limit(task_t task)9217 task_set_extra_footprint_limit(
9218 	task_t task)
9219 {
9220 	if (task->task_extra_footprint_limit) {
9221 		return;
9222 	}
9223 	task_lock(task);
9224 	if (task->task_extra_footprint_limit) {
9225 		task_unlock(task);
9226 		return;
9227 	}
9228 	task->task_extra_footprint_limit = TRUE;
9229 	task_unlock(task);
9230 	memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
9231 }
9232 
9233 void
task_set_ios13extended_footprint_limit(task_t task)9234 task_set_ios13extended_footprint_limit(
9235 	task_t task)
9236 {
9237 	if (task->task_ios13extended_footprint_limit) {
9238 		return;
9239 	}
9240 	task_lock(task);
9241 	if (task->task_ios13extended_footprint_limit) {
9242 		task_unlock(task);
9243 		return;
9244 	}
9245 	task->task_ios13extended_footprint_limit = TRUE;
9246 	task_unlock(task);
9247 	memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
9248 }
9249 #endif /* __arm64__ */
9250 
9251 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)9252 task_ledger_get_balance(
9253 	ledger_t        ledger,
9254 	int             ledger_idx)
9255 {
9256 	ledger_amount_t amount;
9257 	amount = 0;
9258 	ledger_get_balance(ledger, ledger_idx, &amount);
9259 	return amount;
9260 }
9261 
9262 /*
9263  * Gather the amount of memory counted in a task's footprint due to
9264  * being in a specific set of ledgers.
9265  */
9266 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)9267 task_ledgers_footprint(
9268 	ledger_t        ledger,
9269 	ledger_amount_t *ledger_resident,
9270 	ledger_amount_t *ledger_compressed)
9271 {
9272 	*ledger_resident = 0;
9273 	*ledger_compressed = 0;
9274 
9275 	/* purgeable non-volatile memory */
9276 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
9277 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
9278 
9279 	/* "default" tagged memory */
9280 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
9281 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
9282 
9283 	/* "network" currently never counts in the footprint... */
9284 
9285 	/* "media" tagged memory */
9286 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
9287 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
9288 
9289 	/* "graphics" tagged memory */
9290 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
9291 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
9292 
9293 	/* "neural" tagged memory */
9294 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
9295 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
9296 }
9297 
9298 #if CONFIG_MEMORYSTATUS
9299 /*
9300  * Credit any outstanding task dirty time to the ledger.
9301  * memstat_dirty_start is pushed forward to prevent any possibility of double
9302  * counting, making it safe to call this as often as necessary to ensure that
9303  * anyone reading the ledger gets up-to-date information.
9304  */
9305 void
task_ledger_settle_dirty_time(task_t t)9306 task_ledger_settle_dirty_time(task_t t)
9307 {
9308 	task_lock(t);
9309 
9310 	uint64_t start = t->memstat_dirty_start;
9311 	if (start) {
9312 		uint64_t now = mach_absolute_time();
9313 
9314 		uint64_t duration;
9315 		absolutetime_to_nanoseconds(now - start, &duration);
9316 
9317 		ledger_t ledger = get_task_ledger(t);
9318 		ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
9319 
9320 		t->memstat_dirty_start = now;
9321 	}
9322 
9323 	task_unlock(t);
9324 }
9325 #endif /* CONFIG_MEMORYSTATUS */
9326 
9327 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)9328 task_set_memory_ownership_transfer(
9329 	task_t    task,
9330 	boolean_t value)
9331 {
9332 	task_lock(task);
9333 	task->task_can_transfer_memory_ownership = !!value;
9334 	task_unlock(task);
9335 }
9336 
9337 #if DEVELOPMENT || DEBUG
9338 
9339 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)9340 task_set_no_footprint_for_debug(task_t task, boolean_t value)
9341 {
9342 	task_lock(task);
9343 	task->task_no_footprint_for_debug = !!value;
9344 	task_unlock(task);
9345 }
9346 
9347 int
task_get_no_footprint_for_debug(task_t task)9348 task_get_no_footprint_for_debug(task_t task)
9349 {
9350 	return task->task_no_footprint_for_debug;
9351 }
9352 
9353 #endif /* DEVELOPMENT || DEBUG */
9354 
9355 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)9356 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
9357 {
9358 	vm_object_t find_vmo;
9359 	size_t size = 0;
9360 
9361 	/*
9362 	 * Allocate a save area for FP state before taking task_objq lock,
9363 	 * if necessary, to ensure that VM_KERNEL_ADDRHASH() doesn't cause
9364 	 * an FP state allocation while holding VM locks.
9365 	 */
9366 	ml_fp_save_area_prealloc();
9367 
9368 	task_objq_lock(task);
9369 	if (query != NULL) {
9370 		queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
9371 		{
9372 			vm_object_query_t p = &query[size++];
9373 
9374 			/* make sure to not overrun */
9375 			if (size * sizeof(vm_object_query_data_t) > len) {
9376 				--size;
9377 				break;
9378 			}
9379 
9380 			bzero(p, sizeof(*p));
9381 			p->object_id = (vm_object_id_t) VM_KERNEL_ADDRHASH(find_vmo);
9382 			p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
9383 			p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
9384 			p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
9385 			p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
9386 			p->vo_no_footprint = find_vmo->vo_no_footprint;
9387 			p->vo_ledger_tag = find_vmo->vo_ledger_tag;
9388 			p->purgable = find_vmo->purgable;
9389 
9390 			if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
9391 				p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
9392 			} else {
9393 				p->compressed_size = 0;
9394 			}
9395 		}
9396 	} else {
9397 		size = (size_t)task->task_owned_objects;
9398 	}
9399 	task_objq_unlock(task);
9400 
9401 	*num = size;
9402 }
9403 
9404 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)9405 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
9406 {
9407 	assert(output_size);
9408 	assert(entries);
9409 
9410 	/* copy the vmobjects and vmobject data out of the task */
9411 	if (buffer_size == 0) {
9412 		task_copy_vmobjects(task, NULL, 0, entries);
9413 		*output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
9414 	} else {
9415 		assert(buffer);
9416 		task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
9417 		buffer->entries = (uint64_t)*entries;
9418 		*output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
9419 	}
9420 }
9421 
9422 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)9423 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
9424 {
9425 	size_t buffer_size;
9426 	vmobject_list_output_t buffer;
9427 	size_t output_size;
9428 	size_t entries;
9429 
9430 	assert(to_task != from_task);
9431 
9432 	/* get the size, allocate a bufferr, and populate */
9433 	entries = 0;
9434 	output_size = 0;
9435 	task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
9436 
9437 	if (output_size) {
9438 		buffer_size = output_size;
9439 		buffer = kalloc_data(buffer_size, Z_WAITOK);
9440 
9441 		if (buffer) {
9442 			entries = 0;
9443 			output_size = 0;
9444 
9445 			task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
9446 
9447 			if (entries) {
9448 				to_task->corpse_vmobject_list = buffer;
9449 				to_task->corpse_vmobject_list_size = buffer_size;
9450 			}
9451 		}
9452 	}
9453 }
9454 
9455 void
task_set_filter_msg_flag(task_t task,boolean_t flag)9456 task_set_filter_msg_flag(
9457 	task_t task,
9458 	boolean_t flag)
9459 {
9460 	assert(task != TASK_NULL);
9461 
9462 	if (flag) {
9463 		task_ro_flags_set(task, TFRO_FILTER_MSG);
9464 	} else {
9465 		task_ro_flags_clear(task, TFRO_FILTER_MSG);
9466 	}
9467 }
9468 
9469 boolean_t
task_get_filter_msg_flag(task_t task)9470 task_get_filter_msg_flag(
9471 	task_t task)
9472 {
9473 	if (!task) {
9474 		return false;
9475 	}
9476 
9477 	return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
9478 }
9479 bool
task_is_exotic(task_t task)9480 task_is_exotic(
9481 	task_t task)
9482 {
9483 	if (task == TASK_NULL) {
9484 		return false;
9485 	}
9486 	return vm_map_is_exotic(get_task_map(task));
9487 }
9488 
9489 bool
task_is_alien(task_t task)9490 task_is_alien(
9491 	task_t task)
9492 {
9493 	if (task == TASK_NULL) {
9494 		return false;
9495 	}
9496 	return vm_map_is_alien(get_task_map(task));
9497 }
9498 
9499 
9500 
9501 #if CONFIG_MACF
9502 /* Set the filter mask for Mach traps. */
9503 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)9504 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
9505 {
9506 	assert(task);
9507 
9508 	task_set_mach_trap_filter_mask(task, maskptr);
9509 }
9510 
9511 /* Set the filter mask for kobject msgs. */
9512 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)9513 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
9514 {
9515 	assert(task);
9516 
9517 	task_set_mach_kobj_filter_mask(task, maskptr);
9518 }
9519 
9520 /* Hook for mach trap/sc filter evaluation policy. */
9521 SECURITY_READ_ONLY_LATE(mac_task_mach_filter_cbfunc_t) mac_task_mach_trap_evaluate = NULL;
9522 
9523 /* Hook for kobj message filter evaluation policy. */
9524 SECURITY_READ_ONLY_LATE(mac_task_kobj_filter_cbfunc_t) mac_task_kobj_msg_evaluate = NULL;
9525 
9526 /* Set the callback hooks for the filtering policy. */
9527 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)9528 mac_task_register_filter_callbacks(
9529 	const mac_task_mach_filter_cbfunc_t mach_cbfunc,
9530 	const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
9531 {
9532 	if (mach_cbfunc != NULL) {
9533 		if (mac_task_mach_trap_evaluate != NULL) {
9534 			return KERN_FAILURE;
9535 		}
9536 		mac_task_mach_trap_evaluate = mach_cbfunc;
9537 	}
9538 	if (kobj_cbfunc != NULL) {
9539 		if (mac_task_kobj_msg_evaluate != NULL) {
9540 			return KERN_FAILURE;
9541 		}
9542 		mac_task_kobj_msg_evaluate = kobj_cbfunc;
9543 	}
9544 
9545 	return KERN_SUCCESS;
9546 }
9547 #endif /* CONFIG_MACF */
9548 
9549 #if CONFIG_ROSETTA
9550 bool
task_is_translated(task_t task)9551 task_is_translated(task_t task)
9552 {
9553 	extern boolean_t proc_is_translated(struct proc* p);
9554 	return task && proc_is_translated(get_bsdtask_info(task));
9555 }
9556 #endif
9557 
9558 
9559 
9560 #if __has_feature(ptrauth_calls)
9561 /* On FPAC, we want to deliver all PAC violations as fatal exceptions, regardless
9562  * of the enable_pac_exception boot-arg value or any other entitlements.
9563  * The only case where we allow non-fatal PAC exceptions on FPAC is for debugging,
9564  * which requires Developer Mode enabled.
9565  *
9566  * On non-FPAC hardware, we gate the decision behind entitlements and the
9567  * enable_pac_exception boot-arg.
9568  */
9569 extern int gARM_FEAT_FPAC;
9570 /*
9571  * Having the PAC_EXCEPTION_ENTITLEMENT entitlement means we always enforce all
9572  * of the PAC exception hardening: fatal exceptions and signed user state.
9573  */
9574 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
9575 /*
9576  * On non-FPAC hardware, when enable_pac_exception boot-arg is set to true,
9577  * processes can choose to get non-fatal PAC exception delivery by setting
9578  * the SKIP_PAC_EXCEPTION_ENTITLEMENT entitlement.
9579  */
9580 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
9581 
9582 void
task_set_pac_exception_fatal_flag(task_t task)9583 task_set_pac_exception_fatal_flag(
9584 	task_t task)
9585 {
9586 	assert(task != TASK_NULL);
9587 	bool pac_hardened_task = false;
9588 	uint32_t set_flags = 0;
9589 
9590 	/*
9591 	 * We must not apply this security policy on tasks which have opted out of mach hardening to
9592 	 * avoid regressions in third party plugins and third party apps when using AMFI boot-args
9593 	 */
9594 	bool platform_binary = task_get_platform_binary(task);
9595 #if XNU_TARGET_OS_OSX
9596 	platform_binary &= !task_opted_out_mach_hardening(task);
9597 #endif /* XNU_TARGET_OS_OSX */
9598 
9599 	/*
9600 	 * On non-FPAC hardware, we allow gating PAC exceptions behind
9601 	 * SKIP_PAC_EXCEPTION_ENTITLEMENT and the boot-arg.
9602 	 */
9603 	if (!gARM_FEAT_FPAC && enable_pac_exception &&
9604 	    IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
9605 		return;
9606 	}
9607 
9608 	if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT) || task_get_hardened_runtime(task)) {
9609 		pac_hardened_task = true;
9610 		set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
9611 	}
9612 
9613 	/* On non-FPAC hardware, gate the fatal property behind entitlements and boot-arg. */
9614 	if (pac_hardened_task ||
9615 	    ((enable_pac_exception || gARM_FEAT_FPAC) && platform_binary)) {
9616 		/* If debugging is configured, do not make PAC exception fatal. */
9617 		if (address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9618 			set_flags |= TFRO_PAC_EXC_FATAL;
9619 		}
9620 	}
9621 
9622 	if (set_flags != 0) {
9623 		task_ro_flags_set(task, set_flags);
9624 	}
9625 }
9626 
9627 bool
task_is_pac_exception_fatal(task_t task)9628 task_is_pac_exception_fatal(
9629 	task_t task)
9630 {
9631 	assert(task != TASK_NULL);
9632 	return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
9633 }
9634 #endif /* __has_feature(ptrauth_calls) */
9635 
9636 /*
9637  * FATAL_EXCEPTION_ENTITLEMENT, if present, will contain a list of
9638  * conditions for which access violations should deliver SIGKILL rather than
9639  * SIGSEGV.  This is a hardening measure intended for use by applications
9640  * that are able to handle the stricter error handling behavior.  Currently
9641  * this supports FATAL_EXCEPTION_ENTITLEMENT_JIT, which is documented in
9642  * user_fault_in_self_restrict_mode().
9643  */
9644 #define FATAL_EXCEPTION_ENTITLEMENT "com.apple.security.fatal-exceptions"
9645 #define FATAL_EXCEPTION_ENTITLEMENT_JIT "jit"
9646 
9647 void
task_set_jit_exception_fatal_flag(task_t task)9648 task_set_jit_exception_fatal_flag(
9649 	task_t task)
9650 {
9651 	assert(task != TASK_NULL);
9652 	if (IOTaskHasStringEntitlement(task, FATAL_EXCEPTION_ENTITLEMENT, FATAL_EXCEPTION_ENTITLEMENT_JIT) &&
9653 	    address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9654 		task_ro_flags_set(task, TFRO_JIT_EXC_FATAL);
9655 	}
9656 }
9657 
9658 bool
task_is_jit_exception_fatal(__unused task_t task)9659 task_is_jit_exception_fatal(
9660 	__unused task_t task)
9661 {
9662 #if !defined(XNU_PLATFORM_MacOSX)
9663 	return true;
9664 #else
9665 	assert(task != TASK_NULL);
9666 	return !!(task_ro_flags_get(task) & TFRO_JIT_EXC_FATAL);
9667 #endif
9668 }
9669 
9670 bool
task_needs_user_signed_thread_state(task_t task)9671 task_needs_user_signed_thread_state(
9672 	task_t task)
9673 {
9674 	assert(task != TASK_NULL);
9675 	return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
9676 }
9677 
9678 void
task_set_tecs(task_t task)9679 task_set_tecs(task_t task)
9680 {
9681 	if (task == TASK_NULL) {
9682 		task = current_task();
9683 	}
9684 
9685 	if (!machine_csv(CPUVN_CI)) {
9686 		return;
9687 	}
9688 
9689 	LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9690 
9691 	task_lock(task);
9692 
9693 	task->t_flags |= TF_TECS;
9694 
9695 	thread_t thread;
9696 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
9697 		machine_tecs(thread);
9698 	}
9699 	task_unlock(task);
9700 }
9701 
9702 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)9703 task_test_sync_upcall(
9704 	task_t     task,
9705 	ipc_port_t send_port)
9706 {
9707 #if DEVELOPMENT || DEBUG
9708 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9709 		return KERN_INVALID_ARGUMENT;
9710 	}
9711 
9712 	/* Block on sync kernel upcall on the given send port */
9713 	mach_test_sync_upcall(send_port);
9714 
9715 	ipc_port_release_send(send_port);
9716 	return KERN_SUCCESS;
9717 #else
9718 	(void)task;
9719 	(void)send_port;
9720 	return KERN_NOT_SUPPORTED;
9721 #endif
9722 }
9723 
9724 kern_return_t
task_test_async_upcall_propagation(task_t task,ipc_port_t send_port,int qos,int iotier)9725 task_test_async_upcall_propagation(
9726 	task_t      task,
9727 	ipc_port_t  send_port,
9728 	int         qos,
9729 	int         iotier)
9730 {
9731 #if DEVELOPMENT || DEBUG
9732 	kern_return_t kr;
9733 
9734 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9735 		return KERN_INVALID_ARGUMENT;
9736 	}
9737 
9738 	if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
9739 	    iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
9740 		return KERN_INVALID_ARGUMENT;
9741 	}
9742 
9743 	struct thread_attr_for_ipc_propagation attr = {
9744 		.tafip_iotier = iotier,
9745 		.tafip_qos = qos
9746 	};
9747 
9748 	/* Apply propagate attr to port */
9749 	kr = ipc_port_propagate_thread_attr(send_port, attr);
9750 	if (kr != KERN_SUCCESS) {
9751 		return kr;
9752 	}
9753 
9754 	thread_enable_send_importance(current_thread(), TRUE);
9755 
9756 	/* Perform an async kernel upcall on the given send port */
9757 	mach_test_async_upcall(send_port);
9758 	thread_enable_send_importance(current_thread(), FALSE);
9759 
9760 	ipc_port_release_send(send_port);
9761 	return KERN_SUCCESS;
9762 #else
9763 	(void)task;
9764 	(void)send_port;
9765 	(void)qos;
9766 	(void)iotier;
9767 	return KERN_NOT_SUPPORTED;
9768 #endif
9769 }
9770 
9771 #if CONFIG_PROC_RESOURCE_LIMITS
9772 mach_port_name_t
current_task_get_fatal_port_name(void)9773 current_task_get_fatal_port_name(void)
9774 {
9775 	mach_port_t task_fatal_port = MACH_PORT_NULL;
9776 	mach_port_name_t port_name = 0;
9777 
9778 	task_fatal_port = task_allocate_fatal_port();
9779 
9780 	if (task_fatal_port) {
9781 		ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9782 		    IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9783 	}
9784 
9785 	return port_name;
9786 }
9787 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9788 
9789 #if defined(__x86_64__)
9790 bool
curtask_get_insn_copy_optout(void)9791 curtask_get_insn_copy_optout(void)
9792 {
9793 	bool optout;
9794 	task_t cur_task = current_task();
9795 
9796 	task_lock(cur_task);
9797 	optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9798 	task_unlock(cur_task);
9799 
9800 	return optout;
9801 }
9802 
9803 void
curtask_set_insn_copy_optout(void)9804 curtask_set_insn_copy_optout(void)
9805 {
9806 	task_t cur_task = current_task();
9807 
9808 	task_lock(cur_task);
9809 
9810 	cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9811 
9812 	thread_t thread;
9813 	queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9814 		machine_thread_set_insn_copy_optout(thread);
9815 	}
9816 	task_unlock(cur_task);
9817 }
9818 #endif /* defined(__x86_64__) */
9819 
9820 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9821 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9822 {
9823 	assert(task);
9824 	assert(list_size);
9825 
9826 	*list = task->corpse_vmobject_list;
9827 	*list_size = (size_t)task->corpse_vmobject_list_size;
9828 }
9829 
9830 __abortlike
9831 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9832 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9833 {
9834 	panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9835 	    "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9836 }
9837 
9838 proc_ro_t
task_get_ro(task_t t)9839 task_get_ro(task_t t)
9840 {
9841 	proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9842 
9843 	zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9844 	if (__improbable(proc_ro_task(ro) != t)) {
9845 		panic_proc_ro_task_backref_mismatch(t, ro);
9846 	}
9847 
9848 	return ro;
9849 }
9850 
9851 uint32_t
task_ro_flags_get(task_t task)9852 task_ro_flags_get(task_t task)
9853 {
9854 	return task_get_ro(task)->t_flags_ro;
9855 }
9856 
9857 void
task_ro_flags_set(task_t task,uint32_t flags)9858 task_ro_flags_set(task_t task, uint32_t flags)
9859 {
9860 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9861 	    t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9862 }
9863 
9864 void
task_ro_flags_clear(task_t task,uint32_t flags)9865 task_ro_flags_clear(task_t task, uint32_t flags)
9866 {
9867 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9868 	    t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9869 }
9870 
9871 task_control_port_options_t
task_get_control_port_options(task_t task)9872 task_get_control_port_options(task_t task)
9873 {
9874 	return task_get_ro(task)->task_control_port_options;
9875 }
9876 
9877 void
task_set_control_port_options(task_t task,task_control_port_options_t opts)9878 task_set_control_port_options(task_t task, task_control_port_options_t opts)
9879 {
9880 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9881 	    task_control_port_options, &opts);
9882 }
9883 
9884 /*!
9885  * @function kdp_task_is_locked
9886  *
9887  * @abstract
9888  * Checks if task is locked.
9889  *
9890  * @discussion
9891  * NOT SAFE: To be used only by kernel debugger.
9892  *
9893  * @param task task to check
9894  *
9895  * @returns TRUE if the task is locked.
9896  */
9897 boolean_t
kdp_task_is_locked(task_t task)9898 kdp_task_is_locked(task_t task)
9899 {
9900 	return kdp_lck_mtx_lock_spin_is_acquired(&task->lock);
9901 }
9902 
9903 #if DEBUG || DEVELOPMENT
9904 /**
9905  *
9906  * Check if a threshold limit is valid based on the actual phys memory
9907  * limit. If they are same, race conditions may arise, so we have to prevent
9908  * it to happen.
9909  */
9910 static diagthreshold_check_return
task_check_memorythreshold_is_valid(task_t task,uint64_t new_limit,bool is_diagnostics_value)9911 task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value)
9912 {
9913 	int phys_limit_mb;
9914 	kern_return_t ret_value;
9915 	bool threshold_enabled;
9916 	bool dummy;
9917 	ret_value = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, &threshold_enabled);
9918 	if (ret_value != KERN_SUCCESS) {
9919 		return ret_value;
9920 	}
9921 	if (is_diagnostics_value == true) {
9922 		ret_value = task_get_phys_footprint_limit(task, &phys_limit_mb);
9923 	} else {
9924 		uint64_t diag_limit;
9925 		ret_value = task_get_diag_footprint_limit_internal(task, &diag_limit, &dummy);
9926 		phys_limit_mb = (int)(diag_limit >> 20);
9927 	}
9928 	if (ret_value != KERN_SUCCESS) {
9929 		return ret_value;
9930 	}
9931 	if (phys_limit_mb == (int)  new_limit) {
9932 		if (threshold_enabled == false) {
9933 			return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED;
9934 		} else {
9935 			return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED;
9936 		}
9937 	}
9938 	if (threshold_enabled == false) {
9939 		return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED;
9940 	} else {
9941 		return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED;
9942 	}
9943 }
9944 #endif
9945 
9946 #if CONFIG_EXCLAVES
9947 kern_return_t
task_add_conclave(task_t task,void * vnode,int64_t off,const char * task_conclave_id)9948 task_add_conclave(task_t task, void *vnode, int64_t off, const char *task_conclave_id)
9949 {
9950 	/*
9951 	 * Only launchd or properly entitled tasks can attach tasks to
9952 	 * conclaves.
9953 	 */
9954 	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9955 		return KERN_DENIED;
9956 	}
9957 
9958 	/*
9959 	 * Only entitled tasks can have conclaves attached.
9960 	 * Allow tasks which have the SPAWN privilege to also host conclaves.
9961 	 * This allows xpc proxy to add a conclave before execing a daemon.
9962 	 */
9963 	if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST) &&
9964 	    !exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9965 		return KERN_DENIED;
9966 	}
9967 
9968 	/*
9969 	 * Make this EXCLAVES_BOOT_STAGE_2 until userspace is actually
9970 	 * triggering the EXCLAVESKIT boot stage.
9971 	 */
9972 	kern_return_t kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_2);
9973 	if (kr != KERN_SUCCESS) {
9974 		return kr;
9975 	}
9976 
9977 	return exclaves_conclave_attach(EXCLAVES_DOMAIN_KERNEL, task_conclave_id, task);
9978 }
9979 
9980 kern_return_t
task_launch_conclave(mach_port_name_t port __unused)9981 task_launch_conclave(mach_port_name_t port __unused)
9982 {
9983 	kern_return_t kr = KERN_FAILURE;
9984 	assert3u(port, ==, MACH_PORT_NULL);
9985 	exclaves_resource_t *conclave = task_get_conclave(current_task());
9986 	if (conclave == NULL) {
9987 		return kr;
9988 	}
9989 
9990 	kr = exclaves_conclave_launch(conclave);
9991 	if (kr != KERN_SUCCESS) {
9992 		return kr;
9993 	}
9994 	task_set_conclave_taint(current_task());
9995 
9996 	return KERN_SUCCESS;
9997 }
9998 
9999 kern_return_t
task_inherit_conclave(task_t old_task,task_t new_task,void * vnode,int64_t off)10000 task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off)
10001 {
10002 	if (old_task->conclave == NULL ||
10003 	    !exclaves_conclave_is_attached(old_task->conclave)) {
10004 		return KERN_SUCCESS;
10005 	}
10006 
10007 	/*
10008 	 * Only launchd or properly entitled tasks can attach tasks to
10009 	 * conclaves.
10010 	 */
10011 	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
10012 		return KERN_DENIED;
10013 	}
10014 
10015 	/*
10016 	 * Only entitled tasks can have conclaves attached.
10017 	 */
10018 	if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST)) {
10019 		return KERN_DENIED;
10020 	}
10021 
10022 	return exclaves_conclave_inherit(old_task->conclave, old_task, new_task);
10023 }
10024 
10025 void
task_clear_conclave(task_t task)10026 task_clear_conclave(task_t task)
10027 {
10028 	if (task->exclave_crash_info) {
10029 		kfree_data(task->exclave_crash_info, CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE);
10030 		task->exclave_crash_info = NULL;
10031 	}
10032 
10033 	if (task->conclave == NULL) {
10034 		return;
10035 	}
10036 
10037 	/*
10038 	 * XXX
10039 	 * This should only fail if either the conclave is in an unexpected
10040 	 * state (i.e. not ATTACHED) or if the wrong port is supplied.
10041 	 * We should re-visit this and make sure we guarantee the above
10042 	 * constraints.
10043 	 */
10044 	__assert_only kern_return_t ret =
10045 	    exclaves_conclave_detach(task->conclave, task);
10046 	assert3u(ret, ==, KERN_SUCCESS);
10047 }
10048 
10049 void
task_stop_conclave(task_t task,bool gather_crash_bt)10050 task_stop_conclave(task_t task, bool gather_crash_bt)
10051 {
10052 	thread_t thread = current_thread();
10053 
10054 	if (task->conclave == NULL) {
10055 		return;
10056 	}
10057 
10058 	if (task_should_panic_on_exit_due_to_conclave_taint(task)) {
10059 		panic("Conclave tainted task %p terminated\n", task);
10060 	}
10061 
10062 	/* Stash the task on current thread for conclave teardown */
10063 	thread->conclave_stop_task = task;
10064 
10065 	__assert_only kern_return_t ret =
10066 	    exclaves_conclave_stop(task->conclave, gather_crash_bt);
10067 
10068 	thread->conclave_stop_task = TASK_NULL;
10069 
10070 	assert3u(ret, ==, KERN_SUCCESS);
10071 }
10072 
10073 kern_return_t
task_stop_conclave_upcall(void)10074 task_stop_conclave_upcall(void)
10075 {
10076 	task_t task = current_task();
10077 	if (task->conclave == NULL) {
10078 		return KERN_INVALID_TASK;
10079 	}
10080 
10081 	return exclaves_conclave_stop_upcall(task->conclave);
10082 }
10083 
10084 kern_return_t
task_stop_conclave_upcall_complete(void)10085 task_stop_conclave_upcall_complete(void)
10086 {
10087 	task_t task = current_task();
10088 	thread_t thread = current_thread();
10089 
10090 	if (!(thread->th_exclaves_state & TH_EXCLAVES_STOP_UPCALL_PENDING)) {
10091 		return KERN_SUCCESS;
10092 	}
10093 
10094 	assert3p(task->conclave, !=, NULL);
10095 
10096 	return exclaves_conclave_stop_upcall_complete(task->conclave, task);
10097 }
10098 
10099 kern_return_t
task_suspend_conclave_upcall(uint64_t * scid_list,size_t scid_list_count)10100 task_suspend_conclave_upcall(uint64_t *scid_list, size_t scid_list_count)
10101 {
10102 	task_t task = current_task();
10103 	thread_t thread;
10104 	int scid_count = 0;
10105 	kern_return_t kr;
10106 	if (task->conclave == NULL) {
10107 		return KERN_INVALID_TASK;
10108 	}
10109 
10110 	kr = task_hold_and_wait(task);
10111 
10112 	task_lock(task);
10113 	queue_iterate(&task->threads, thread, thread_t, task_threads)
10114 	{
10115 		if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
10116 			scid_list[scid_count++] = thread->th_exclaves_scheduling_context_id;
10117 			if (scid_count >= scid_list_count) {
10118 				break;
10119 			}
10120 		}
10121 	}
10122 
10123 	task_unlock(task);
10124 	return kr;
10125 }
10126 
10127 kern_return_t
task_crash_info_conclave_upcall(task_t task,const xnuupcalls_conclavesharedbuffer_s * shared_buf,uint32_t length)10128 task_crash_info_conclave_upcall(task_t task, const xnuupcalls_conclavesharedbuffer_s *shared_buf,
10129     uint32_t length)
10130 {
10131 	if (task->conclave == NULL) {
10132 		return KERN_INVALID_TASK;
10133 	}
10134 
10135 	/* Allocate the buffer and memcpy it */
10136 	int task_crash_info_buffer_size = 0;
10137 	uint8_t * task_crash_info_buffer;
10138 
10139 	if (!length) {
10140 		printf("Conclave upcall: task_crash_info_conclave_upcall did not return any page addresses\n");
10141 		return KERN_INVALID_ARGUMENT;
10142 	}
10143 
10144 	task_crash_info_buffer_size = CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE;
10145 	assert3u(task_crash_info_buffer_size, >=, length);
10146 
10147 	task_crash_info_buffer = kalloc_data(task_crash_info_buffer_size, Z_WAITOK);
10148 	if (!task_crash_info_buffer) {
10149 		panic("task_crash_info_conclave_upcall: cannot allocate buffer for task_info shared memory");
10150 		return KERN_INVALID_ARGUMENT;
10151 	}
10152 
10153 	uint8_t * dst = task_crash_info_buffer;
10154 	uint32_t remaining = length;
10155 	for (size_t i = 0; i < CONCLAVE_CRASH_BUFFER_PAGECOUNT; i++) {
10156 		if (remaining) {
10157 			memcpy(dst, (uint8_t*)phystokv((pmap_paddr_t)shared_buf->physaddr[i]), PAGE_SIZE);
10158 			remaining = (remaining >= PAGE_SIZE) ? remaining - PAGE_SIZE : 0;
10159 			dst += PAGE_SIZE;
10160 		}
10161 	}
10162 
10163 	task_lock(task);
10164 	if (task->exclave_crash_info == NULL && task->active) {
10165 		task->exclave_crash_info = task_crash_info_buffer;
10166 		task->exclave_crash_info_length = length;
10167 		task_crash_info_buffer = NULL;
10168 	}
10169 	task_unlock(task);
10170 
10171 	if (task_crash_info_buffer) {
10172 		kfree_data(task_crash_info_buffer, task_crash_info_buffer_size);
10173 	}
10174 
10175 	return KERN_SUCCESS;
10176 }
10177 
10178 exclaves_resource_t *
task_get_conclave(task_t task)10179 task_get_conclave(task_t task)
10180 {
10181 	return task->conclave;
10182 }
10183 
10184 extern boolean_t IOPMRootDomainGetWillShutdown(void);
10185 
10186 TUNABLE(bool, disable_conclave_taint, "disable_conclave_taint", true); /* Do not taint processes when they talk to conclave, so system does not panic when exit. */
10187 
10188 static bool
task_should_panic_on_exit_due_to_conclave_taint(task_t task)10189 task_should_panic_on_exit_due_to_conclave_taint(task_t task)
10190 {
10191 	/* Check if boot-arg to disable conclave taint is set */
10192 	if (disable_conclave_taint) {
10193 		return false;
10194 	}
10195 
10196 	/* Check if the system is shutting down */
10197 	if (IOPMRootDomainGetWillShutdown()) {
10198 		return false;
10199 	}
10200 
10201 	return task_is_conclave_tainted(task);
10202 }
10203 
10204 static bool
task_is_conclave_tainted(task_t task)10205 task_is_conclave_tainted(task_t task)
10206 {
10207 	return (task->t_exclave_state & TES_CONCLAVE_TAINTED) != 0 &&
10208 	       !(task->t_exclave_state & TES_CONCLAVE_UNTAINTABLE);
10209 }
10210 
10211 static void
task_set_conclave_taint(task_t task)10212 task_set_conclave_taint(task_t task)
10213 {
10214 	os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_TAINTED, relaxed);
10215 }
10216 
10217 void
task_set_conclave_untaintable(task_t task)10218 task_set_conclave_untaintable(task_t task)
10219 {
10220 	os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_UNTAINTABLE, relaxed);
10221 }
10222 
10223 void
task_add_conclave_crash_info(task_t task,void * crash_info_ptr)10224 task_add_conclave_crash_info(task_t task, void *crash_info_ptr)
10225 {
10226 	__block kern_return_t error = KERN_SUCCESS;
10227 	tb_error_t tberr = TB_ERROR_SUCCESS;
10228 	void *crash_info;
10229 	uint32_t crash_info_length = 0;
10230 
10231 	if (task->conclave == NULL) {
10232 		return;
10233 	}
10234 
10235 	if (task->exclave_crash_info_length == 0) {
10236 		return;
10237 	}
10238 
10239 	error = kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
10240 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10241 	if (error != KERN_SUCCESS) {
10242 		return;
10243 	}
10244 
10245 	crash_info = task->exclave_crash_info;
10246 	crash_info_length = task->exclave_crash_info_length;
10247 
10248 	tberr = stackshot_stackshotresult__unmarshal(crash_info,
10249 	    (uint64_t)crash_info_length, ^(stackshot_stackshotresult_s result){
10250 		error = stackshot_exclaves_process_stackshot(&result, crash_info_ptr);
10251 		if (error != KERN_SUCCESS) {
10252 		        printf("stackshot_exclaves_process_result: error processing stackshot result %d\n", error);
10253 		}
10254 	});
10255 	if (tberr != TB_ERROR_SUCCESS) {
10256 		printf("task_conclave_crash: task_add_conclave_crash_info could not unmarshal stackshot data 0x%x\n", tberr);
10257 		error = KERN_FAILURE;
10258 		goto error_exit;
10259 	}
10260 
10261 error_exit:
10262 	kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_END,
10263 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10264 
10265 	return;
10266 }
10267 
10268 #endif /* CONFIG_EXCLAVES */
10269 
10270 #pragma mark task utils
10271 
10272 /* defined in bsd/kern/kern_proc.c */
10273 extern void proc_name(int pid, char *buf, int size);
10274 extern char *proc_best_name(struct proc *p);
10275 
10276 void
task_procname(task_t task,char * buf,int size)10277 task_procname(task_t task, char *buf, int size)
10278 {
10279 	proc_name(task_pid(task), buf, size);
10280 }
10281 
10282 void
task_best_name(task_t task,char * buf,size_t size)10283 task_best_name(task_t task, char *buf, size_t size)
10284 {
10285 	char *name = proc_best_name(task_get_proc_raw(task));
10286 	strlcpy(buf, name, size);
10287 }
10288