xref: /xnu-8019.80.24/osfmk/kern/task.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_FREE_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/task.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59  *		David Black
60  *
61  *	Task management primitives implementation.
62  */
63 /*
64  * Copyright (c) 1993 The University of Utah and
65  * the Computer Systems Laboratory (CSL).  All rights reserved.
66  *
67  * Permission to use, copy, modify and distribute this software and its
68  * documentation is hereby granted, provided that both the copyright
69  * notice and this permission notice appear in all copies of the
70  * software, derivative works or modified versions, and any portions
71  * thereof, and that both notices appear in supporting documentation.
72  *
73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76  *
77  * CSL requests users of this software to return to [email protected] any
78  * improvements that they make and grant CSL redistribution rights.
79  *
80  */
81 /*
82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83  * support for mandatory and extensible security protections.  This notice
84  * is included in support of clause 2.2 (b) of the Apple Public License,
85  * Version 2.0.
86  * Copyright (c) 2005 SPARTA, Inc.
87  */
88 
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101 
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108 
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/sched_prim.h>    /* for thread_wakeup */
120 #include <kern/ipc_tt.h>
121 #include <kern/host.h>
122 #include <kern/clock.h>
123 #include <kern/timer.h>
124 #include <kern/assert.h>
125 #include <kern/affinity.h>
126 #include <kern/exc_resource.h>
127 #include <kern/machine.h>
128 #include <kern/policy_internal.h>
129 #include <kern/restartable.h>
130 #include <kern/ipc_kobject.h>
131 
132 #include <corpses/task_corpse.h>
133 #if CONFIG_TELEMETRY
134 #include <kern/telemetry.h>
135 #endif
136 
137 #if MONOTONIC
138 #include <kern/monotonic.h>
139 #include <machine/monotonic.h>
140 #endif /* MONOTONIC */
141 
142 #include <os/log.h>
143 
144 #include <vm/pmap.h>
145 #include <vm/vm_map.h>
146 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
147 #include <vm/vm_pageout.h>
148 #include <vm/vm_protos.h>
149 #include <vm/vm_purgeable_internal.h>
150 #include <vm/vm_compressor_pager.h>
151 
152 #include <sys/proc_ro.h>
153 #include <sys/resource.h>
154 #include <sys/signalvar.h> /* for coredump */
155 #include <sys/bsdtask_info.h>
156 /*
157  * Exported interfaces
158  */
159 
160 #include <mach/task_server.h>
161 #include <mach/mach_host_server.h>
162 #include <mach/mach_port_server.h>
163 
164 #include <vm/vm_shared_region.h>
165 
166 #include <libkern/OSDebug.h>
167 #include <libkern/OSAtomic.h>
168 #include <libkern/section_keywords.h>
169 
170 #include <mach-o/loader.h>
171 #include <kdp/kdp_dyld.h>
172 
173 #include <kern/sfi.h>           /* picks up ledger.h */
174 
175 #if CONFIG_MACF
176 #include <security/mac_mach_internal.h>
177 #endif
178 
179 #include <IOKit/IOBSD.h>
180 
181 #if KPERF
182 extern int kpc_force_all_ctrs(task_t, int);
183 #endif
184 
185 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
186 
187 int64_t         next_taskuniqueid = 0;
188 
189 static SECURITY_READ_ONLY_LATE(zone_t) task_zone;
190 ZONE_INIT(&task_zone, "tasks", sizeof(struct task),
191     ZC_ZFREE_CLEARMEM, ZONE_ID_TASK, NULL);
192 
193 extern uint32_t ipc_control_port_options;
194 
195 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
196 extern void task_disown_frozen_csegs(task_t owner_task);
197 
198 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
199 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
200 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
201 
202 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
203 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
204     .iko_op_no_senders = task_port_no_senders);
205 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
206     .iko_op_no_senders = task_port_with_flavor_no_senders);
207 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
208     .iko_op_no_senders = task_port_with_flavor_no_senders);
209 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
210     .iko_op_no_senders = task_suspension_no_senders);
211 
212 #if CONFIG_PROC_RESOURCE_LIMITS
213 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
214 static mach_port_t task_allocate_fatal_port(void);
215 
216 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
217     .iko_op_stable     = true,
218     .iko_op_no_senders = task_fatal_port_no_senders);
219 
220 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
221 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
222 
223 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
224 int audio_active = 0;
225 
226 /*
227  *	structure for tracking zone usage
228  *	Used either one per task/thread for all zones or <per-task,per-zone>.
229  */
230 typedef struct zinfo_usage_store_t {
231 	/* These fields may be updated atomically, and so must be 8 byte aligned */
232 	uint64_t        alloc __attribute__((aligned(8)));              /* allocation counter */
233 	uint64_t        free __attribute__((aligned(8)));               /* free counter */
234 } zinfo_usage_store_t;
235 
236 zinfo_usage_store_t tasks_tkm_private;
237 zinfo_usage_store_t tasks_tkm_shared;
238 
239 /* A container to accumulate statistics for expired tasks */
240 expired_task_statistics_t               dead_task_statistics;
241 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
242 
243 ledger_template_t task_ledger_template = NULL;
244 
245 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
246 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
247 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
248 
249 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
250 {.cpu_time = -1,
251  .tkm_private = -1,
252  .tkm_shared = -1,
253  .phys_mem = -1,
254  .wired_mem = -1,
255  .internal = -1,
256  .iokit_mapped = -1,
257  .external = -1,
258  .reusable = -1,
259  .alternate_accounting = -1,
260  .alternate_accounting_compressed = -1,
261  .page_table = -1,
262  .phys_footprint = -1,
263  .internal_compressed = -1,
264  .purgeable_volatile = -1,
265  .purgeable_nonvolatile = -1,
266  .purgeable_volatile_compressed = -1,
267  .purgeable_nonvolatile_compressed = -1,
268  .tagged_nofootprint = -1,
269  .tagged_footprint = -1,
270  .tagged_nofootprint_compressed = -1,
271  .tagged_footprint_compressed = -1,
272  .network_volatile = -1,
273  .network_nonvolatile = -1,
274  .network_volatile_compressed = -1,
275  .network_nonvolatile_compressed = -1,
276  .media_nofootprint = -1,
277  .media_footprint = -1,
278  .media_nofootprint_compressed = -1,
279  .media_footprint_compressed = -1,
280  .graphics_nofootprint = -1,
281  .graphics_footprint = -1,
282  .graphics_nofootprint_compressed = -1,
283  .graphics_footprint_compressed = -1,
284  .neural_nofootprint = -1,
285  .neural_footprint = -1,
286  .neural_nofootprint_compressed = -1,
287  .neural_footprint_compressed = -1,
288  .platform_idle_wakeups = -1,
289  .interrupt_wakeups = -1,
290 #if CONFIG_SCHED_SFI
291  .sfi_wait_times = { 0 /* initialized at runtime */},
292 #endif /* CONFIG_SCHED_SFI */
293  .cpu_time_billed_to_me = -1,
294  .cpu_time_billed_to_others = -1,
295  .physical_writes = -1,
296  .logical_writes = -1,
297  .logical_writes_to_external = -1,
298 #if DEBUG || DEVELOPMENT
299  .pages_grabbed = -1,
300  .pages_grabbed_kern = -1,
301  .pages_grabbed_iopl = -1,
302  .pages_grabbed_upl = -1,
303 #endif
304 #if CONFIG_FREEZE
305  .frozen_to_swap = -1,
306 #endif /* CONFIG_FREEZE */
307  .energy_billed_to_me = -1,
308  .energy_billed_to_others = -1,
309 #if CONFIG_PHYS_WRITE_ACCT
310  .fs_metadata_writes = -1,
311 #endif /* CONFIG_PHYS_WRITE_ACCT */
312 #if CONFIG_MEMORYSTATUS
313  .memorystatus_dirty_time = -1,
314 #endif /* CONFIG_MEMORYSTATUS */
315  .swapins = -1, };
316 
317 /* System sleep state */
318 boolean_t tasks_suspend_state;
319 
320 
321 void init_task_ledgers(void);
322 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
323 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
324 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
325 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
326 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
327 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
328 #if CONFIG_PROC_RESOURCE_LIMITS
329 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
330 mach_port_name_t current_task_get_fatal_port_name(void);
331 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
332 
333 kern_return_t task_suspend_internal(task_t);
334 kern_return_t task_resume_internal(task_t);
335 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
336 
337 extern kern_return_t iokit_task_terminate(task_t task);
338 extern void          iokit_task_app_suspended_changed(task_t task);
339 
340 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
341 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
342 extern kern_return_t thread_resume(thread_t thread);
343 
344 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
345 
346 // Warn tasks when they hit 80% of their memory limit.
347 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
348 
349 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
350 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
351 
352 /*
353  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
354  *
355  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
356  *  stacktraces, aka micro-stackshots)
357  */
358 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
359 
360 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
361 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
362 
363 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
364 
365 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
366 
367 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
368 unsigned int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
369 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
370 
371 /* I/O Monitor Limits */
372 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
373 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
374 
375 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
376 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
377 
378 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
379 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
380 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
381 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
382 static boolean_t global_update_logical_writes(int64_t, int64_t*);
383 
384 #define TASK_MAX_THREAD_LIMIT 256
385 
386 #if MACH_ASSERT
387 int pmap_ledgers_panic = 1;
388 int pmap_ledgers_panic_leeway = 3;
389 #endif /* MACH_ASSERT */
390 
391 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
392 
393 #if CONFIG_COREDUMP
394 int hwm_user_cores = 0; /* high watermark violations generate user core files */
395 #endif
396 
397 #ifdef MACH_BSD
398 extern uint32_t proc_platform(const struct proc *);
399 extern uint32_t proc_min_sdk(struct proc *);
400 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
401 extern int      proc_pid(struct proc *p);
402 extern int      proc_selfpid(void);
403 extern struct proc *current_proc(void);
404 extern char     *proc_name_address(struct proc *p);
405 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
406 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
407 extern void workq_proc_suspended(struct proc *p);
408 extern void workq_proc_resumed(struct proc *p);
409 
410 #if CONFIG_MEMORYSTATUS
411 extern void     proc_memstat_skip(struct proc* p, boolean_t set);
412 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
413 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
414 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
415 extern uint64_t  memorystatus_available_memory_internal(struct proc *p);
416 
417 #if DEVELOPMENT || DEBUG
418 extern void memorystatus_abort_vm_map_fork(task_t);
419 #endif
420 
421 #endif /* CONFIG_MEMORYSTATUS */
422 
423 #endif /* MACH_BSD */
424 
425 #if DEVELOPMENT || DEBUG
426 int exc_resource_threads_enabled;
427 #endif /* DEVELOPMENT || DEBUG */
428 
429 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
430 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
431 
432 /*
433  * Defaults for controllable EXC_GUARD behaviors
434  *
435  * Internal builds are fatal by default (except BRIDGE).
436  * Create an alternate set of defaults for special processes by name.
437  */
438 struct task_exc_guard_named_default {
439 	char *name;
440 	uint32_t behavior;
441 };
442 #define _TASK_EXC_GUARD_MP_CORPSE  (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
443 #define _TASK_EXC_GUARD_MP_ONCE    (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
444 #define _TASK_EXC_GUARD_MP_FATAL   (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
445 
446 #define _TASK_EXC_GUARD_VM_CORPSE  (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
447 #define _TASK_EXC_GUARD_VM_ONCE    (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
448 #define _TASK_EXC_GUARD_VM_FATAL   (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
449 
450 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
451 #define _TASK_EXC_GUARD_ALL_ONCE   (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
452 #define _TASK_EXC_GUARD_ALL_FATAL  (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
453 
454 /* cannot turn off FATAL and DELIVER bit if set */
455 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
456     TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
457 /* cannot turn on ONCE bit if unset */
458 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
459 
460 #if !defined(XNU_TARGET_OS_BRIDGE)
461 
462 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
463 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
464 /*
465  * These "by-process-name" default overrides are intended to be a short-term fix to
466  * quickly get over races between changes introducing new EXC_GUARD raising behaviors
467  * in some process and a change in default behavior for same. We should ship with
468  * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
469  * exception behavior via task_set_exc_guard_behavior()).
470  *
471  * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
472  * task_exc_guard_default when transitioning this list between empty and
473  * non-empty.
474  */
475 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
476 
477 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
478 
479 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
480 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
481 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
482 
483 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
484 
485 /* Forwards */
486 
487 static void task_hold_locked(task_t task);
488 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
489 static void task_release_locked(task_t task);
490 
491 static void task_synchronizer_destroy_all(task_t task);
492 static os_ref_count_t
493 task_add_turnstile_watchports_locked(
494 	task_t                      task,
495 	struct task_watchports      *watchports,
496 	struct task_watchport_elem  **previous_elem_array,
497 	ipc_port_t                  *portwatch_ports,
498 	uint32_t                    portwatch_count);
499 
500 static os_ref_count_t
501 task_remove_turnstile_watchports_locked(
502 	task_t                 task,
503 	struct task_watchports *watchports,
504 	ipc_port_t             *port_freelist);
505 
506 static struct task_watchports *
507 task_watchports_alloc_init(
508 	task_t        task,
509 	thread_t      thread,
510 	uint32_t      count);
511 
512 static void
513 task_watchports_deallocate(
514 	struct task_watchports *watchports);
515 
516 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)517 task_set_64bit(
518 	task_t task,
519 	boolean_t is_64bit,
520 	boolean_t is_64bit_data)
521 {
522 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
523 	thread_t thread;
524 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
525 
526 	task_lock(task);
527 
528 	/*
529 	 * Switching to/from 64-bit address spaces
530 	 */
531 	if (is_64bit) {
532 		if (!task_has_64Bit_addr(task)) {
533 			task_set_64Bit_addr(task);
534 		}
535 	} else {
536 		if (task_has_64Bit_addr(task)) {
537 			task_clear_64Bit_addr(task);
538 		}
539 	}
540 
541 	/*
542 	 * Switching to/from 64-bit register state.
543 	 */
544 	if (is_64bit_data) {
545 		if (task_has_64Bit_data(task)) {
546 			goto out;
547 		}
548 
549 		task_set_64Bit_data(task);
550 	} else {
551 		if (!task_has_64Bit_data(task)) {
552 			goto out;
553 		}
554 
555 		task_clear_64Bit_data(task);
556 	}
557 
558 	/* FIXME: On x86, the thread save state flavor can diverge from the
559 	 * task's 64-bit feature flag due to the 32-bit/64-bit register save
560 	 * state dichotomy. Since we can be pre-empted in this interval,
561 	 * certain routines may observe the thread as being in an inconsistent
562 	 * state with respect to its task's 64-bitness.
563 	 */
564 
565 #if defined(__x86_64__) || defined(__arm64__)
566 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
567 		thread_mtx_lock(thread);
568 		machine_thread_switch_addrmode(thread);
569 		thread_mtx_unlock(thread);
570 	}
571 #endif /* defined(__x86_64__) || defined(__arm64__) */
572 
573 out:
574 	task_unlock(task);
575 }
576 
577 bool
task_get_64bit_addr(task_t task)578 task_get_64bit_addr(task_t task)
579 {
580 	return task_has_64Bit_addr(task);
581 }
582 
583 bool
task_get_64bit_data(task_t task)584 task_get_64bit_data(task_t task)
585 {
586 	return task_has_64Bit_data(task);
587 }
588 
589 void
task_set_platform_binary(task_t task,boolean_t is_platform)590 task_set_platform_binary(
591 	task_t task,
592 	boolean_t is_platform)
593 {
594 	task_lock(task);
595 	if (is_platform) {
596 		task->t_flags |= TF_PLATFORM;
597 	} else {
598 		task->t_flags &= ~(TF_PLATFORM);
599 	}
600 	task_unlock(task);
601 }
602 
603 void
task_set_immovable_pinned(task_t task)604 task_set_immovable_pinned(task_t task)
605 {
606 	ipc_task_set_immovable_pinned(task);
607 }
608 
609 /*
610  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
611  * Returns "false" if flag is already set, and "true" in other cases.
612  */
613 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)614 task_set_ca_client_wi(
615 	task_t task,
616 	boolean_t set_or_clear)
617 {
618 	bool ret = true;
619 	task_lock(task);
620 	if (set_or_clear) {
621 		/* Tasks can have only one CA_CLIENT work interval */
622 		if (task->t_flags & TF_CA_CLIENT_WI) {
623 			ret = false;
624 		} else {
625 			task->t_flags |= TF_CA_CLIENT_WI;
626 		}
627 	} else {
628 		task->t_flags &= ~TF_CA_CLIENT_WI;
629 	}
630 	task_unlock(task);
631 	return ret;
632 }
633 
634 void
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)635 task_set_dyld_info(
636 	task_t task,
637 	mach_vm_address_t addr,
638 	mach_vm_size_t size)
639 {
640 	task_lock(task);
641 	task->all_image_info_addr = addr;
642 	task->all_image_info_size = size;
643 	task_unlock(task);
644 }
645 
646 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)647 task_set_mach_header_address(
648 	task_t task,
649 	mach_vm_address_t addr)
650 {
651 	task_lock(task);
652 	task->mach_header_vm_address = addr;
653 	task_unlock(task);
654 }
655 
656 void
task_bank_reset(__unused task_t task)657 task_bank_reset(__unused task_t task)
658 {
659 	if (task->bank_context != NULL) {
660 		bank_task_destroy(task);
661 	}
662 }
663 
664 /*
665  * NOTE: This should only be called when the P_LINTRANSIT
666  *	 flag is set (the proc_trans lock is held) on the
667  *	 proc associated with the task.
668  */
669 void
task_bank_init(__unused task_t task)670 task_bank_init(__unused task_t task)
671 {
672 	if (task->bank_context != NULL) {
673 		panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
674 	}
675 	bank_task_initialize(task);
676 }
677 
678 void
task_set_did_exec_flag(task_t task)679 task_set_did_exec_flag(task_t task)
680 {
681 	task->t_procflags |= TPF_DID_EXEC;
682 }
683 
684 void
task_clear_exec_copy_flag(task_t task)685 task_clear_exec_copy_flag(task_t task)
686 {
687 	task->t_procflags &= ~TPF_EXEC_COPY;
688 }
689 
690 event_t
task_get_return_wait_event(task_t task)691 task_get_return_wait_event(task_t task)
692 {
693 	return (event_t)&task->returnwait_inheritor;
694 }
695 
696 void
task_clear_return_wait(task_t task,uint32_t flags)697 task_clear_return_wait(task_t task, uint32_t flags)
698 {
699 	if (flags & TCRW_CLEAR_INITIAL_WAIT) {
700 		thread_wakeup(task_get_return_wait_event(task));
701 	}
702 
703 	if (flags & TCRW_CLEAR_FINAL_WAIT) {
704 		is_write_lock(task->itk_space);
705 
706 		task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
707 		task->returnwait_inheritor = NULL;
708 
709 		if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
710 			struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
711 			    NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
712 
713 			waitq_wakeup64_all(&turnstile->ts_waitq,
714 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
715 			    THREAD_AWAKENED, 0);
716 
717 			turnstile_update_inheritor(turnstile, NULL,
718 			    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
719 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
720 
721 			turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
722 			turnstile_cleanup();
723 			task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
724 		}
725 		is_write_unlock(task->itk_space);
726 	}
727 }
728 
729 void __attribute__((noreturn))
task_wait_to_return(void)730 task_wait_to_return(void)
731 {
732 	task_t task = current_task();
733 
734 	is_write_lock(task->itk_space);
735 
736 	if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
737 		struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
738 		    NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
739 
740 		do {
741 			task->t_returnwaitflags |= TRW_LRETURNWAITER;
742 			turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
743 			    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
744 
745 			waitq_assert_wait64(&turnstile->ts_waitq,
746 			    CAST_EVENT64_T(task_get_return_wait_event(task)),
747 			    THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
748 
749 			is_write_unlock(task->itk_space);
750 
751 			turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
752 
753 			thread_block(THREAD_CONTINUE_NULL);
754 
755 			is_write_lock(task->itk_space);
756 		} while (task->t_returnwaitflags & TRW_LRETURNWAIT);
757 
758 		turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
759 	}
760 
761 	is_write_unlock(task->itk_space);
762 	turnstile_cleanup();
763 
764 
765 #if CONFIG_MACF
766 	/*
767 	 * Before jumping to userspace and allowing this process to execute any code,
768 	 * notify any interested parties.
769 	 */
770 	mac_proc_notify_exec_complete(current_proc());
771 #endif
772 
773 	thread_bootstrap_return();
774 }
775 
776 #ifdef CONFIG_32BIT_TELEMETRY
777 boolean_t
task_consume_32bit_log_flag(task_t task)778 task_consume_32bit_log_flag(task_t task)
779 {
780 	if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
781 		task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
782 		return TRUE;
783 	} else {
784 		return FALSE;
785 	}
786 }
787 
788 void
task_set_32bit_log_flag(task_t task)789 task_set_32bit_log_flag(task_t task)
790 {
791 	task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
792 }
793 #endif /* CONFIG_32BIT_TELEMETRY */
794 
795 boolean_t
task_is_exec_copy(task_t task)796 task_is_exec_copy(task_t task)
797 {
798 	return task_is_exec_copy_internal(task);
799 }
800 
801 boolean_t
task_did_exec(task_t task)802 task_did_exec(task_t task)
803 {
804 	return task_did_exec_internal(task);
805 }
806 
807 boolean_t
task_is_active(task_t task)808 task_is_active(task_t task)
809 {
810 	return task->active;
811 }
812 
813 boolean_t
task_is_halting(task_t task)814 task_is_halting(task_t task)
815 {
816 	return task->halting;
817 }
818 
819 void
task_init(void)820 task_init(void)
821 {
822 	/*
823 	 * Configure per-task memory limit.
824 	 * The boot-arg is interpreted as Megabytes,
825 	 * and takes precedence over the device tree.
826 	 * Setting the boot-arg to 0 disables task limits.
827 	 */
828 	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
829 	    sizeof(max_task_footprint_mb))) {
830 		/*
831 		 * No limit was found in boot-args, so go look in the device tree.
832 		 */
833 		if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
834 		    sizeof(max_task_footprint_mb))) {
835 			/*
836 			 * No limit was found in device tree.
837 			 */
838 			max_task_footprint_mb = 0;
839 		}
840 	}
841 
842 	if (max_task_footprint_mb != 0) {
843 #if CONFIG_MEMORYSTATUS
844 		if (max_task_footprint_mb < 50) {
845 			printf("Warning: max_task_pmem %d below minimum.\n",
846 			    max_task_footprint_mb);
847 			max_task_footprint_mb = 50;
848 		}
849 		printf("Limiting task physical memory footprint to %d MB\n",
850 		    max_task_footprint_mb);
851 
852 		max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024;         // Convert MB to bytes
853 
854 		/*
855 		 * Configure the per-task memory limit warning level.
856 		 * This is computed as a percentage.
857 		 */
858 		max_task_footprint_warning_level = 0;
859 
860 		if (max_mem < 0x40000000) {
861 			/*
862 			 * On devices with < 1GB of memory:
863 			 *    -- set warnings to 50MB below the per-task limit.
864 			 */
865 			if (max_task_footprint_mb > 50) {
866 				max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
867 			}
868 		} else {
869 			/*
870 			 * On devices with >= 1GB of memory:
871 			 *    -- set warnings to 100MB below the per-task limit.
872 			 */
873 			if (max_task_footprint_mb > 100) {
874 				max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
875 			}
876 		}
877 
878 		/*
879 		 * Never allow warning level to land below the default.
880 		 */
881 		if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
882 			max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
883 		}
884 
885 		printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
886 
887 #else
888 		printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
889 #endif /* CONFIG_MEMORYSTATUS */
890 	}
891 
892 #if DEVELOPMENT || DEBUG
893 	if (!PE_parse_boot_argn("exc_resource_threads",
894 	    &exc_resource_threads_enabled,
895 	    sizeof(exc_resource_threads_enabled))) {
896 		exc_resource_threads_enabled = 1;
897 	}
898 	PE_parse_boot_argn("task_exc_guard_default",
899 	    &task_exc_guard_default,
900 	    sizeof(task_exc_guard_default));
901 #endif /* DEVELOPMENT || DEBUG */
902 
903 #if CONFIG_COREDUMP
904 	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
905 	    sizeof(hwm_user_cores))) {
906 		hwm_user_cores = 0;
907 	}
908 #endif
909 
910 	proc_init_cpumon_params();
911 
912 	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
913 		task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
914 	}
915 
916 	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
917 		task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
918 	}
919 
920 	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
921 	    sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
922 		task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
923 	}
924 
925 	if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
926 	    sizeof(disable_exc_resource))) {
927 		disable_exc_resource = 0;
928 	}
929 
930 	if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
931 		task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
932 	}
933 
934 	if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
935 		task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
936 	}
937 
938 	if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
939 		io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
940 	}
941 
942 /*
943  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
944  * sets up the ledgers for the default coalition. If we don't have coalitions,
945  * then we have to call it now.
946  */
947 #if CONFIG_COALITIONS
948 	assert(task_ledger_template);
949 #else /* CONFIG_COALITIONS */
950 	init_task_ledgers();
951 #endif /* CONFIG_COALITIONS */
952 
953 	task_ref_init();
954 
955 	/*
956 	 * Create the kernel task as the first task.
957 	 */
958 #ifdef __LP64__
959 	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
960 #else
961 	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
962 #endif
963 	{ panic("task_init");}
964 
965 #if defined(HAS_APPLE_PAC)
966 	kernel_task->rop_pid = ml_default_rop_pid();
967 	kernel_task->jop_pid = ml_default_jop_pid();
968 	// kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
969 	// disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
970 	ml_task_set_disable_user_jop(kernel_task, FALSE);
971 #endif
972 
973 	vm_map_deallocate(kernel_task->map);
974 	kernel_task->map = kernel_map;
975 }
976 
977 /*
978  * Create a task running in the kernel address space.  It may
979  * have its own map of size mem_size and may have ipc privileges.
980  */
981 kern_return_t
kernel_task_create(__unused task_t parent_task,__unused vm_offset_t map_base,__unused vm_size_t map_size,__unused task_t * child_task)982 kernel_task_create(
983 	__unused task_t         parent_task,
984 	__unused vm_offset_t            map_base,
985 	__unused vm_size_t              map_size,
986 	__unused task_t         *child_task)
987 {
988 	return KERN_INVALID_ARGUMENT;
989 }
990 
991 kern_return_t
task_create(task_t parent_task,__unused ledger_port_array_t ledger_ports,__unused mach_msg_type_number_t num_ledger_ports,__unused boolean_t inherit_memory,__unused task_t * child_task)992 task_create(
993 	task_t                          parent_task,
994 	__unused ledger_port_array_t    ledger_ports,
995 	__unused mach_msg_type_number_t num_ledger_ports,
996 	__unused boolean_t              inherit_memory,
997 	__unused task_t                 *child_task)        /* OUT */
998 {
999 	if (parent_task == TASK_NULL) {
1000 		return KERN_INVALID_ARGUMENT;
1001 	}
1002 
1003 	/*
1004 	 * No longer supported: too many calls assume that a task has a valid
1005 	 * process attached.
1006 	 */
1007 	return KERN_FAILURE;
1008 }
1009 
1010 /*
1011  * Task ledgers
1012  * ------------
1013  *
1014  * phys_footprint
1015  *   Physical footprint: This is the sum of:
1016  *     + (internal - alternate_accounting)
1017  *     + (internal_compressed - alternate_accounting_compressed)
1018  *     + iokit_mapped
1019  *     + purgeable_nonvolatile
1020  *     + purgeable_nonvolatile_compressed
1021  *     + page_table
1022  *
1023  * internal
1024  *   The task's anonymous memory, which on iOS is always resident.
1025  *
1026  * internal_compressed
1027  *   Amount of this task's internal memory which is held by the compressor.
1028  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1029  *   and could be either decompressed back into memory, or paged out to storage, depending
1030  *   on our implementation.
1031  *
1032  * iokit_mapped
1033  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1034  *    clean/dirty or internal/external state].
1035  *
1036  * alternate_accounting
1037  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1038  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1039  *   double counting.
1040  *
1041  * pages_grabbed
1042  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1043  *   which track UPL, IOPL and Kernel page grabs.
1044  */
1045 void
init_task_ledgers(void)1046 init_task_ledgers(void)
1047 {
1048 	ledger_template_t t;
1049 
1050 	assert(task_ledger_template == NULL);
1051 	assert(kernel_task == TASK_NULL);
1052 
1053 #if MACH_ASSERT
1054 	PE_parse_boot_argn("pmap_ledgers_panic",
1055 	    &pmap_ledgers_panic,
1056 	    sizeof(pmap_ledgers_panic));
1057 	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1058 	    &pmap_ledgers_panic_leeway,
1059 	    sizeof(pmap_ledgers_panic_leeway));
1060 #endif /* MACH_ASSERT */
1061 
1062 	if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1063 		panic("couldn't create task ledger template");
1064 	}
1065 
1066 	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1067 	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1068 	    "physmem", "bytes");
1069 	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1070 	    "bytes");
1071 	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1072 	    "bytes");
1073 	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1074 	    "bytes");
1075 	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1076 	    "bytes");
1077 	task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1078 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1079 	task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1080 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1081 	task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1082 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1083 	task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1084 	    "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1085 	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1086 	    "bytes");
1087 	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1088 	    "bytes");
1089 	task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1090 	task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1091 	task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1092 	task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1093 	task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1094 	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1095 #if DEBUG || DEVELOPMENT
1096 	task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1097 	task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1098 	task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1099 	task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1100 #endif
1101 	task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1102 	task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1103 	task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1104 	task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1105 	task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1106 	task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1107 	task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1108 	task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1109 	task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1110 	task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1111 	task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1112 	task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1113 	task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1114 	task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1115 	task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1116 	task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1117 	task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1118 	task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1119 	task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1120 	task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1121 
1122 #if CONFIG_FREEZE
1123 	task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1124 #endif /* CONFIG_FREEZE */
1125 
1126 	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1127 	    "count");
1128 	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1129 	    "count");
1130 
1131 #if CONFIG_SCHED_SFI
1132 	sfi_class_id_t class_id, ledger_alias;
1133 	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1134 		task_ledgers.sfi_wait_times[class_id] = -1;
1135 	}
1136 
1137 	/* don't account for UNSPECIFIED */
1138 	for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1139 		ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1140 		if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1141 			/* Check to see if alias has been registered yet */
1142 			if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1143 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1144 			} else {
1145 				/* Otherwise, initialize it first */
1146 				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1147 			}
1148 		} else {
1149 			task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1150 		}
1151 
1152 		if (task_ledgers.sfi_wait_times[class_id] < 0) {
1153 			panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1154 		}
1155 	}
1156 
1157 	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1158 #endif /* CONFIG_SCHED_SFI */
1159 
1160 	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1161 	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1162 	task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1163 	task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1164 	task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1165 #if CONFIG_PHYS_WRITE_ACCT
1166 	task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1167 #endif /* CONFIG_PHYS_WRITE_ACCT */
1168 	task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1169 	task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1170 
1171 #if CONFIG_MEMORYSTATUS
1172 	task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1173 #endif /* CONFIG_MEMORYSTATUS */
1174 
1175 	task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1176 	    LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1177 
1178 	if ((task_ledgers.cpu_time < 0) ||
1179 	    (task_ledgers.tkm_private < 0) ||
1180 	    (task_ledgers.tkm_shared < 0) ||
1181 	    (task_ledgers.phys_mem < 0) ||
1182 	    (task_ledgers.wired_mem < 0) ||
1183 	    (task_ledgers.internal < 0) ||
1184 	    (task_ledgers.external < 0) ||
1185 	    (task_ledgers.reusable < 0) ||
1186 	    (task_ledgers.iokit_mapped < 0) ||
1187 	    (task_ledgers.alternate_accounting < 0) ||
1188 	    (task_ledgers.alternate_accounting_compressed < 0) ||
1189 	    (task_ledgers.page_table < 0) ||
1190 	    (task_ledgers.phys_footprint < 0) ||
1191 	    (task_ledgers.internal_compressed < 0) ||
1192 	    (task_ledgers.purgeable_volatile < 0) ||
1193 	    (task_ledgers.purgeable_nonvolatile < 0) ||
1194 	    (task_ledgers.purgeable_volatile_compressed < 0) ||
1195 	    (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1196 	    (task_ledgers.tagged_nofootprint < 0) ||
1197 	    (task_ledgers.tagged_footprint < 0) ||
1198 	    (task_ledgers.tagged_nofootprint_compressed < 0) ||
1199 	    (task_ledgers.tagged_footprint_compressed < 0) ||
1200 #if CONFIG_FREEZE
1201 	    (task_ledgers.frozen_to_swap < 0) ||
1202 #endif /* CONFIG_FREEZE */
1203 	    (task_ledgers.network_volatile < 0) ||
1204 	    (task_ledgers.network_nonvolatile < 0) ||
1205 	    (task_ledgers.network_volatile_compressed < 0) ||
1206 	    (task_ledgers.network_nonvolatile_compressed < 0) ||
1207 	    (task_ledgers.media_nofootprint < 0) ||
1208 	    (task_ledgers.media_footprint < 0) ||
1209 	    (task_ledgers.media_nofootprint_compressed < 0) ||
1210 	    (task_ledgers.media_footprint_compressed < 0) ||
1211 	    (task_ledgers.graphics_nofootprint < 0) ||
1212 	    (task_ledgers.graphics_footprint < 0) ||
1213 	    (task_ledgers.graphics_nofootprint_compressed < 0) ||
1214 	    (task_ledgers.graphics_footprint_compressed < 0) ||
1215 	    (task_ledgers.neural_nofootprint < 0) ||
1216 	    (task_ledgers.neural_footprint < 0) ||
1217 	    (task_ledgers.neural_nofootprint_compressed < 0) ||
1218 	    (task_ledgers.neural_footprint_compressed < 0) ||
1219 	    (task_ledgers.platform_idle_wakeups < 0) ||
1220 	    (task_ledgers.interrupt_wakeups < 0) ||
1221 	    (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1222 	    (task_ledgers.physical_writes < 0) ||
1223 	    (task_ledgers.logical_writes < 0) ||
1224 	    (task_ledgers.logical_writes_to_external < 0) ||
1225 #if CONFIG_PHYS_WRITE_ACCT
1226 	    (task_ledgers.fs_metadata_writes < 0) ||
1227 #endif /* CONFIG_PHYS_WRITE_ACCT */
1228 #if CONFIG_MEMORYSTATUS
1229 	    (task_ledgers.memorystatus_dirty_time < 0) ||
1230 #endif /* CONFIG_MEMORYSTATUS */
1231 	    (task_ledgers.energy_billed_to_me < 0) ||
1232 	    (task_ledgers.energy_billed_to_others < 0) ||
1233 	    (task_ledgers.swapins < 0)
1234 	    ) {
1235 		panic("couldn't create entries for task ledger template");
1236 	}
1237 
1238 	ledger_track_credit_only(t, task_ledgers.phys_footprint);
1239 	ledger_track_credit_only(t, task_ledgers.internal);
1240 	ledger_track_credit_only(t, task_ledgers.external);
1241 	ledger_track_credit_only(t, task_ledgers.reusable);
1242 
1243 	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1244 	ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1245 	ledger_track_maximum(t, task_ledgers.internal, 60);
1246 	ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1247 	ledger_track_maximum(t, task_ledgers.reusable, 60);
1248 	ledger_track_maximum(t, task_ledgers.external, 60);
1249 #if MACH_ASSERT
1250 	if (pmap_ledgers_panic) {
1251 		ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1252 		ledger_panic_on_negative(t, task_ledgers.page_table);
1253 		ledger_panic_on_negative(t, task_ledgers.internal);
1254 		ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1255 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1256 		ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1257 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1258 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1259 		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1260 		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1261 #if CONFIG_PHYS_WRITE_ACCT
1262 		ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1263 #endif /* CONFIG_PHYS_WRITE_ACCT */
1264 
1265 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1266 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1267 		ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1268 		ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1269 		ledger_panic_on_negative(t, task_ledgers.network_volatile);
1270 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1271 		ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1272 		ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1273 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1274 		ledger_panic_on_negative(t, task_ledgers.media_footprint);
1275 		ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1276 		ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1277 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1278 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1279 		ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1280 		ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1281 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1282 		ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1283 		ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1284 		ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1285 	}
1286 #endif /* MACH_ASSERT */
1287 
1288 #if CONFIG_MEMORYSTATUS
1289 	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1290 #endif /* CONFIG_MEMORYSTATUS */
1291 
1292 	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1293 	    task_wakeups_rate_exceeded, NULL, NULL);
1294 	ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1295 
1296 #if XNU_MONITOR
1297 	ledger_template_complete_secure_alloc(t);
1298 #else /* XNU_MONITOR */
1299 	ledger_template_complete(t);
1300 #endif /* XNU_MONITOR */
1301 	task_ledger_template = t;
1302 }
1303 
1304 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit __unused,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t * child_task)1305 task_create_internal(
1306 	task_t             parent_task,            /* Null-able */
1307 	proc_ro_t          proc_ro,
1308 	coalition_t        *parent_coalitions __unused,
1309 	boolean_t          inherit_memory,
1310 	boolean_t          is_64bit __unused,
1311 	boolean_t          is_64bit_data,
1312 	uint32_t           t_flags,
1313 	uint32_t           t_procflags,
1314 	uint8_t            t_returnwaitflags,
1315 	task_t             *child_task)            /* OUT */
1316 {
1317 	task_t                  new_task;
1318 	vm_shared_region_t      shared_region;
1319 	ledger_t                ledger = NULL;
1320 	struct task_ro_data     task_ro_data = {};
1321 
1322 	*child_task = NULL;
1323 	new_task = zalloc_flags(task_zone, Z_WAITOK | Z_NOFAIL);
1324 
1325 	if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1326 		zfree(task_zone, new_task);
1327 		return KERN_RESOURCE_SHORTAGE;
1328 	}
1329 
1330 	/* allocate with active entries */
1331 	assert(task_ledger_template != NULL);
1332 	ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1333 	if (ledger == NULL) {
1334 		task_ref_count_fini(new_task);
1335 		zfree(task_zone, new_task);
1336 		return KERN_RESOURCE_SHORTAGE;
1337 	}
1338 
1339 	counter_alloc(&(new_task->faults));
1340 
1341 #if defined(HAS_APPLE_PAC)
1342 	ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1343 	ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1344 	ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1345 #endif
1346 
1347 
1348 	new_task->ledger = ledger;
1349 
1350 	/* if inherit_memory is true, parent_task MUST not be NULL */
1351 	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1352 		new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1353 	} else {
1354 		unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1355 		pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1356 		if (pmap == NULL) {
1357 			counter_free(&new_task->faults);
1358 			ledger_dereference(ledger);
1359 			task_ref_count_fini(new_task);
1360 			zfree(task_zone, new_task);
1361 			return KERN_RESOURCE_SHORTAGE;
1362 		}
1363 		new_task->map = vm_map_create(pmap,
1364 		    (vm_map_offset_t)(VM_MIN_ADDRESS),
1365 		    (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1366 		if (new_task->map == NULL) {
1367 			pmap_destroy(pmap);
1368 		}
1369 	}
1370 
1371 	if (new_task->map == NULL) {
1372 		counter_free(&new_task->faults);
1373 		ledger_dereference(ledger);
1374 		task_ref_count_fini(new_task);
1375 		zfree(task_zone, new_task);
1376 		return KERN_RESOURCE_SHORTAGE;
1377 	}
1378 
1379 #if defined(CONFIG_SCHED_MULTIQ)
1380 	new_task->sched_group = sched_group_create();
1381 #endif
1382 
1383 	/* Inherit address space and memlock limit from parent */
1384 	if (parent_task) {
1385 		vm_map_set_size_limit(new_task->map, parent_task->map->size_limit);
1386 		vm_map_set_data_limit(new_task->map, parent_task->map->data_limit);
1387 		vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1388 	}
1389 
1390 	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1391 	queue_init(&new_task->threads);
1392 	new_task->suspend_count = 0;
1393 	new_task->thread_count = 0;
1394 	new_task->active_thread_count = 0;
1395 	new_task->user_stop_count = 0;
1396 	new_task->legacy_stop_count = 0;
1397 	new_task->active = TRUE;
1398 	new_task->halting = FALSE;
1399 	new_task->priv_flags = 0;
1400 	new_task->t_flags = t_flags;
1401 	new_task->t_procflags = t_procflags;
1402 	new_task->t_returnwaitflags = t_returnwaitflags;
1403 	new_task->returnwait_inheritor = current_thread();
1404 	new_task->importance = 0;
1405 	new_task->crashed_thread_id = 0;
1406 	new_task->exec_token = 0;
1407 	new_task->watchports = NULL;
1408 	new_task->restartable_ranges = NULL;
1409 
1410 	new_task->bank_context = NULL;
1411 
1412 #ifdef MACH_BSD
1413 	new_task->bsd_info = NULL;
1414 	new_task->corpse_info = NULL;
1415 #endif /* MACH_BSD */
1416 
1417 	/* kern_task not created by this function has unique id 0, start with 1 here. */
1418 	task_set_uniqueid(new_task);
1419 
1420 #if CONFIG_MACF
1421 	set_task_crash_label(new_task, NULL);
1422 
1423 	task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1424 	task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1425 #endif
1426 
1427 #if CONFIG_MEMORYSTATUS
1428 	if (max_task_footprint != 0) {
1429 		ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1430 	}
1431 #endif /* CONFIG_MEMORYSTATUS */
1432 
1433 	if (task_wakeups_monitor_rate != 0) {
1434 		uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1435 		int32_t  rate;        // Ignored because of WAKEMON_SET_DEFAULTS
1436 		task_wakeups_monitor_ctl(new_task, &flags, &rate);
1437 	}
1438 
1439 #if CONFIG_IO_ACCOUNTING
1440 	uint32_t flags = IOMON_ENABLE;
1441 	task_io_monitor_ctl(new_task, &flags);
1442 #endif /* CONFIG_IO_ACCOUNTING */
1443 
1444 	machine_task_init(new_task, parent_task, inherit_memory);
1445 
1446 	new_task->task_debug = NULL;
1447 
1448 #if DEVELOPMENT || DEBUG
1449 	new_task->task_unnested = FALSE;
1450 	new_task->task_disconnected_count = 0;
1451 #endif
1452 	queue_init(&new_task->semaphore_list);
1453 	new_task->semaphores_owned = 0;
1454 
1455 	ipc_task_init(new_task, parent_task);
1456 
1457 	new_task->vtimers = 0;
1458 
1459 	new_task->shared_region = NULL;
1460 
1461 	new_task->affinity_space = NULL;
1462 
1463 	new_task->t_kpc = 0;
1464 
1465 	new_task->pidsuspended = FALSE;
1466 	new_task->frozen = FALSE;
1467 	new_task->changing_freeze_state = FALSE;
1468 	new_task->rusage_cpu_flags = 0;
1469 	new_task->rusage_cpu_percentage = 0;
1470 	new_task->rusage_cpu_interval = 0;
1471 	new_task->rusage_cpu_deadline = 0;
1472 	new_task->rusage_cpu_callt = NULL;
1473 #if MACH_ASSERT
1474 	new_task->suspends_outstanding = 0;
1475 #endif
1476 
1477 #if HYPERVISOR
1478 	new_task->hv_task_target = NULL;
1479 #endif /* HYPERVISOR */
1480 
1481 #if CONFIG_TASKWATCH
1482 	queue_init(&new_task->task_watchers);
1483 	new_task->num_taskwatchers  = 0;
1484 	new_task->watchapplying  = 0;
1485 #endif /* CONFIG_TASKWATCH */
1486 
1487 	new_task->mem_notify_reserved = 0;
1488 	new_task->memlimit_attrs_reserved = 0;
1489 
1490 	new_task->requested_policy = default_task_requested_policy;
1491 	new_task->effective_policy = default_task_effective_policy;
1492 
1493 	new_task->task_shared_region_slide = -1;
1494 
1495 	if (parent_task != NULL) {
1496 		task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1497 		task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1498 	} else {
1499 		task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1500 		task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1501 	}
1502 
1503 	/* must set before task_importance_init_from_parent: */
1504 	if (proc_ro != NULL) {
1505 		new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1506 	} else {
1507 		new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1508 	}
1509 
1510 	task_importance_init_from_parent(new_task, parent_task);
1511 
1512 	new_task->corpse_vmobject_list = NULL;
1513 
1514 	if (parent_task != TASK_NULL) {
1515 		/* inherit the parent's shared region */
1516 		shared_region = vm_shared_region_get(parent_task);
1517 		vm_shared_region_set(new_task, shared_region);
1518 
1519 #if __has_feature(ptrauth_calls)
1520 		/* use parent's shared_region_id */
1521 		char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1522 		if (shared_region_id != NULL) {
1523 			shared_region_key_alloc(shared_region_id, FALSE, 0);         /* get a reference */
1524 		}
1525 		task_set_shared_region_id(new_task, shared_region_id);
1526 #endif /* __has_feature(ptrauth_calls) */
1527 
1528 		if (task_has_64Bit_addr(parent_task)) {
1529 			task_set_64Bit_addr(new_task);
1530 		}
1531 
1532 		if (task_has_64Bit_data(parent_task)) {
1533 			task_set_64Bit_data(new_task);
1534 		}
1535 
1536 		new_task->all_image_info_addr = parent_task->all_image_info_addr;
1537 		new_task->all_image_info_size = parent_task->all_image_info_size;
1538 		new_task->mach_header_vm_address = 0;
1539 
1540 		if (inherit_memory && parent_task->affinity_space) {
1541 			task_affinity_create(parent_task, new_task);
1542 		}
1543 
1544 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1545 
1546 		new_task->task_exc_guard = parent_task->task_exc_guard;
1547 		/* only inherit the option bits, no effect until task_set_immovable_pinned() */
1548 		new_task->task_control_port_options = parent_task->task_control_port_options;
1549 
1550 		if (parent_task->t_flags & TF_NO_SMT) {
1551 			new_task->t_flags |= TF_NO_SMT;
1552 		}
1553 
1554 		if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1555 			new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1556 		}
1557 
1558 		if (parent_task->t_flags & TF_TECS) {
1559 			new_task->t_flags |= TF_TECS;
1560 		}
1561 
1562 		if (parent_task->t_flags & TF_FILTER_MSG) {
1563 			new_task->t_flags |= TF_FILTER_MSG;
1564 		}
1565 
1566 #if defined(__x86_64__)
1567 		if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1568 			new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1569 		}
1570 #endif
1571 		new_task->priority = BASEPRI_DEFAULT;
1572 		new_task->max_priority = MAXPRI_USER;
1573 
1574 		task_policy_create(new_task, parent_task);
1575 	} else {
1576 #ifdef __LP64__
1577 		if (is_64bit) {
1578 			task_set_64Bit_addr(new_task);
1579 		}
1580 #endif
1581 
1582 		if (is_64bit_data) {
1583 			task_set_64Bit_data(new_task);
1584 		}
1585 
1586 		new_task->all_image_info_addr = (mach_vm_address_t)0;
1587 		new_task->all_image_info_size = (mach_vm_size_t)0;
1588 
1589 		new_task->pset_hint = PROCESSOR_SET_NULL;
1590 
1591 		new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1592 		new_task->task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1593 
1594 		if (kernel_task == TASK_NULL) {
1595 			new_task->priority = BASEPRI_KERNEL;
1596 			new_task->max_priority = MAXPRI_KERNEL;
1597 		} else {
1598 			new_task->priority = BASEPRI_DEFAULT;
1599 			new_task->max_priority = MAXPRI_USER;
1600 		}
1601 	}
1602 
1603 	bzero(new_task->coalition, sizeof(new_task->coalition));
1604 	for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1605 		queue_chain_init(new_task->task_coalition[i]);
1606 	}
1607 
1608 	/* Allocate I/O Statistics */
1609 	new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1610 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1611 
1612 	bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1613 	bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1614 
1615 	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1616 
1617 	counter_alloc(&(new_task->pageins));
1618 	counter_alloc(&(new_task->cow_faults));
1619 	counter_alloc(&(new_task->messages_sent));
1620 	counter_alloc(&(new_task->messages_received));
1621 
1622 	/* Copy resource acc. info from Parent for Corpe Forked task. */
1623 	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1624 		task_rollup_accounting_info(new_task, parent_task);
1625 		task_store_owned_vmobject_info(new_task, parent_task);
1626 	} else {
1627 		/* Initialize to zero for standard fork/spawn case */
1628 		new_task->total_user_time = 0;
1629 		new_task->total_system_time = 0;
1630 		new_task->total_ptime = 0;
1631 		new_task->total_runnable_time = 0;
1632 		new_task->syscalls_mach = 0;
1633 		new_task->syscalls_unix = 0;
1634 		new_task->c_switch = 0;
1635 		new_task->p_switch = 0;
1636 		new_task->ps_switch = 0;
1637 		new_task->decompressions = 0;
1638 		new_task->low_mem_notified_warn = 0;
1639 		new_task->low_mem_notified_critical = 0;
1640 		new_task->purged_memory_warn = 0;
1641 		new_task->purged_memory_critical = 0;
1642 		new_task->low_mem_privileged_listener = 0;
1643 		new_task->memlimit_is_active = 0;
1644 		new_task->memlimit_is_fatal = 0;
1645 		new_task->memlimit_active_exc_resource = 0;
1646 		new_task->memlimit_inactive_exc_resource = 0;
1647 		new_task->task_timer_wakeups_bin_1 = 0;
1648 		new_task->task_timer_wakeups_bin_2 = 0;
1649 		new_task->task_gpu_ns = 0;
1650 		new_task->task_writes_counters_internal.task_immediate_writes = 0;
1651 		new_task->task_writes_counters_internal.task_deferred_writes = 0;
1652 		new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1653 		new_task->task_writes_counters_internal.task_metadata_writes = 0;
1654 		new_task->task_writes_counters_external.task_immediate_writes = 0;
1655 		new_task->task_writes_counters_external.task_deferred_writes = 0;
1656 		new_task->task_writes_counters_external.task_invalidated_writes = 0;
1657 		new_task->task_writes_counters_external.task_metadata_writes = 0;
1658 #if CONFIG_PHYS_WRITE_ACCT
1659 		new_task->task_fs_metadata_writes = 0;
1660 #endif /* CONFIG_PHYS_WRITE_ACCT */
1661 
1662 		new_task->task_energy = 0;
1663 #if MONOTONIC
1664 		memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1665 #endif /* MONOTONIC */
1666 	}
1667 
1668 
1669 #if CONFIG_COALITIONS
1670 	if (!(t_flags & TF_CORPSE_FORK)) {
1671 		/* TODO: there is no graceful failure path here... */
1672 		if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1673 			coalitions_adopt_task(parent_coalitions, new_task);
1674 		} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1675 			/*
1676 			 * all tasks at least have a resource coalition, so
1677 			 * if the parent has one then inherit all coalitions
1678 			 * the parent is a part of
1679 			 */
1680 			coalitions_adopt_task(parent_task->coalition, new_task);
1681 		} else {
1682 			/* TODO: assert that new_task will be PID 1 (launchd) */
1683 			coalitions_adopt_init_task(new_task);
1684 		}
1685 		/*
1686 		 * on exec, we need to transfer the coalition roles from the
1687 		 * parent task to the exec copy task.
1688 		 */
1689 		if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1690 			int coal_roles[COALITION_NUM_TYPES];
1691 			task_coalition_roles(parent_task, coal_roles);
1692 			(void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1693 		}
1694 	} else {
1695 		coalitions_adopt_corpse_task(new_task);
1696 	}
1697 
1698 	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1699 		panic("created task is not a member of a resource coalition");
1700 	}
1701 	task_set_coalition_member(new_task);
1702 #endif /* CONFIG_COALITIONS */
1703 
1704 	new_task->dispatchqueue_offset = 0;
1705 	if (parent_task != NULL) {
1706 		new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1707 	}
1708 
1709 	new_task->task_can_transfer_memory_ownership = FALSE;
1710 	new_task->task_volatile_objects = 0;
1711 	new_task->task_nonvolatile_objects = 0;
1712 	new_task->task_objects_disowning = FALSE;
1713 	new_task->task_objects_disowned = FALSE;
1714 	new_task->task_owned_objects = 0;
1715 	queue_init(&new_task->task_objq);
1716 
1717 #if CONFIG_FREEZE
1718 	queue_init(&new_task->task_frozen_cseg_q);
1719 #endif /* CONFIG_FREEZE */
1720 
1721 	task_objq_lock_init(new_task);
1722 
1723 #if __arm64__
1724 	new_task->task_legacy_footprint = FALSE;
1725 	new_task->task_extra_footprint_limit = FALSE;
1726 	new_task->task_ios13extended_footprint_limit = FALSE;
1727 #endif /* __arm64__ */
1728 	new_task->task_region_footprint = FALSE;
1729 	new_task->task_has_crossed_thread_limit = FALSE;
1730 	new_task->task_thread_limit = 0;
1731 #if CONFIG_SECLUDED_MEMORY
1732 	new_task->task_can_use_secluded_mem = FALSE;
1733 	new_task->task_could_use_secluded_mem = FALSE;
1734 	new_task->task_could_also_use_secluded_mem = FALSE;
1735 	new_task->task_suppressed_secluded = FALSE;
1736 #endif /* CONFIG_SECLUDED_MEMORY */
1737 
1738 	/*
1739 	 * t_flags is set up above. But since we don't
1740 	 * support darkwake mode being set that way
1741 	 * currently, we clear it out here explicitly.
1742 	 */
1743 	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1744 
1745 	queue_init(&new_task->io_user_clients);
1746 	new_task->loadTag = 0;
1747 
1748 	ipc_task_enable(new_task);
1749 
1750 	lck_mtx_lock(&tasks_threads_lock);
1751 	queue_enter(&tasks, new_task, task_t, tasks);
1752 	tasks_count++;
1753 	if (tasks_suspend_state) {
1754 		task_suspend_internal(new_task);
1755 	}
1756 	lck_mtx_unlock(&tasks_threads_lock);
1757 
1758 	*child_task = new_task;
1759 	return KERN_SUCCESS;
1760 }
1761 
1762 /*
1763  *	task_rollup_accounting_info
1764  *
1765  *	Roll up accounting stats. Used to rollup stats
1766  *	for exec copy task and corpse fork.
1767  */
1768 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1769 task_rollup_accounting_info(task_t to_task, task_t from_task)
1770 {
1771 	assert(from_task != to_task);
1772 
1773 	to_task->total_user_time = from_task->total_user_time;
1774 	to_task->total_system_time = from_task->total_system_time;
1775 	to_task->total_ptime = from_task->total_ptime;
1776 	to_task->total_runnable_time = from_task->total_runnable_time;
1777 	counter_add(&to_task->faults, counter_load(&from_task->faults));
1778 	counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1779 	counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1780 	counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1781 	counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1782 	to_task->decompressions = from_task->decompressions;
1783 	to_task->syscalls_mach = from_task->syscalls_mach;
1784 	to_task->syscalls_unix = from_task->syscalls_unix;
1785 	to_task->c_switch = from_task->c_switch;
1786 	to_task->p_switch = from_task->p_switch;
1787 	to_task->ps_switch = from_task->ps_switch;
1788 	to_task->extmod_statistics = from_task->extmod_statistics;
1789 	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1790 	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1791 	to_task->purged_memory_warn = from_task->purged_memory_warn;
1792 	to_task->purged_memory_critical = from_task->purged_memory_critical;
1793 	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1794 	*to_task->task_io_stats = *from_task->task_io_stats;
1795 	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1796 	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1797 	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1798 	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1799 	to_task->task_gpu_ns = from_task->task_gpu_ns;
1800 	to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1801 	to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1802 	to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1803 	to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1804 	to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1805 	to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1806 	to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1807 	to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1808 #if CONFIG_PHYS_WRITE_ACCT
1809 	to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1810 #endif /* CONFIG_PHYS_WRITE_ACCT */
1811 	to_task->task_energy = from_task->task_energy;
1812 
1813 #if CONFIG_MEMORYSTATUS
1814 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
1815 #endif /* CONFIG_MEMORYSTATUS */
1816 
1817 	/* Skip ledger roll up for memory accounting entries */
1818 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1819 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1820 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1821 #if CONFIG_SCHED_SFI
1822 	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1823 		ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1824 	}
1825 #endif
1826 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1827 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1828 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1829 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1830 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1831 	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1832 }
1833 
1834 /*
1835  *	task_deallocate_internal:
1836  *
1837  *	Drop a reference on a task.
1838  *	Don't call this directly.
1839  */
1840 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
1841 void
task_deallocate_internal(task_t task,os_ref_count_t refs)1842 task_deallocate_internal(
1843 	task_t          task,
1844 	os_ref_count_t  refs)
1845 {
1846 	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1847 
1848 	if (task == TASK_NULL) {
1849 		return;
1850 	}
1851 
1852 #if IMPORTANCE_INHERITANCE
1853 	if (refs == 1) {
1854 		/*
1855 		 * If last ref potentially comes from the task's importance,
1856 		 * disconnect it.  But more task refs may be added before
1857 		 * that completes, so wait for the reference to go to zero
1858 		 * naturally (it may happen on a recursive task_deallocate()
1859 		 * from the ipc_importance_disconnect_task() call).
1860 		 */
1861 		if (IIT_NULL != task->task_imp_base) {
1862 			ipc_importance_disconnect_task(task);
1863 		}
1864 		return;
1865 	}
1866 #endif /* IMPORTANCE_INHERITANCE */
1867 
1868 	if (refs > 0) {
1869 		return;
1870 	}
1871 
1872 	/*
1873 	 * The task should be dead at this point. Ensure other resources
1874 	 * like threads, are gone before we trash the world.
1875 	 */
1876 	assert(queue_empty(&task->threads));
1877 	assert(task->bsd_info == NULL);
1878 	assert(!is_active(task->itk_space));
1879 	assert(!task->active);
1880 	assert(task->active_thread_count == 0);
1881 
1882 	lck_mtx_lock(&tasks_threads_lock);
1883 	assert(terminated_tasks_count > 0);
1884 	queue_remove(&terminated_tasks, task, task_t, tasks);
1885 	terminated_tasks_count--;
1886 	lck_mtx_unlock(&tasks_threads_lock);
1887 
1888 	/*
1889 	 * remove the reference on bank context
1890 	 */
1891 	task_bank_reset(task);
1892 
1893 	kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
1894 
1895 	/*
1896 	 *	Give the machine dependent code a chance
1897 	 *	to perform cleanup before ripping apart
1898 	 *	the task.
1899 	 */
1900 	machine_task_terminate(task);
1901 
1902 	ipc_task_terminate(task);
1903 
1904 	/* let iokit know */
1905 	iokit_task_terminate(task);
1906 
1907 	if (task->affinity_space) {
1908 		task_affinity_deallocate(task);
1909 	}
1910 
1911 #if MACH_ASSERT
1912 	if (task->ledger != NULL &&
1913 	    task->map != NULL &&
1914 	    task->map->pmap != NULL &&
1915 	    task->map->pmap->ledger != NULL) {
1916 		assert(task->ledger == task->map->pmap->ledger);
1917 	}
1918 #endif /* MACH_ASSERT */
1919 
1920 	vm_owned_objects_disown(task);
1921 	assert(task->task_objects_disowned);
1922 	if (task->task_owned_objects != 0) {
1923 		panic("task_deallocate(%p): "
1924 		    "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1925 		    task,
1926 		    task->task_volatile_objects,
1927 		    task->task_nonvolatile_objects,
1928 		    task->task_owned_objects);
1929 	}
1930 
1931 	vm_map_deallocate(task->map);
1932 	is_release(task->itk_space);
1933 	if (task->restartable_ranges) {
1934 		restartable_ranges_release(task->restartable_ranges);
1935 	}
1936 
1937 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1938 	    &interrupt_wakeups, &debit);
1939 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1940 	    &platform_idle_wakeups, &debit);
1941 
1942 #if defined(CONFIG_SCHED_MULTIQ)
1943 	sched_group_destroy(task->sched_group);
1944 #endif
1945 
1946 	/* Accumulate statistics for dead tasks */
1947 	lck_spin_lock(&dead_task_statistics_lock);
1948 	dead_task_statistics.total_user_time += task->total_user_time;
1949 	dead_task_statistics.total_system_time += task->total_system_time;
1950 
1951 	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1952 	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1953 
1954 	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1955 	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1956 	dead_task_statistics.total_ptime += task->total_ptime;
1957 	dead_task_statistics.total_pset_switches += task->ps_switch;
1958 	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1959 	dead_task_statistics.task_energy += task->task_energy;
1960 
1961 	lck_spin_unlock(&dead_task_statistics_lock);
1962 	lck_mtx_destroy(&task->lock, &task_lck_grp);
1963 
1964 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1965 	    &debit)) {
1966 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1967 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1968 	}
1969 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1970 	    &debit)) {
1971 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1972 		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1973 	}
1974 	ledger_dereference(task->ledger);
1975 
1976 	counter_free(&task->faults);
1977 	counter_free(&task->pageins);
1978 	counter_free(&task->cow_faults);
1979 	counter_free(&task->messages_sent);
1980 	counter_free(&task->messages_received);
1981 
1982 #if CONFIG_COALITIONS
1983 	task_release_coalitions(task);
1984 #endif /* CONFIG_COALITIONS */
1985 
1986 	bzero(task->coalition, sizeof(task->coalition));
1987 
1988 #if MACH_BSD
1989 	/* clean up collected information since last reference to task is gone */
1990 	if (task->corpse_info) {
1991 		void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1992 		task_crashinfo_destroy(task->corpse_info);
1993 		task->corpse_info = NULL;
1994 		kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1995 	}
1996 #endif
1997 
1998 #if CONFIG_MACF
1999 	if (get_task_crash_label(task)) {
2000 		mac_exc_free_label(get_task_crash_label(task));
2001 		set_task_crash_label(task, NULL);
2002 	}
2003 #endif
2004 
2005 	assert(queue_empty(&task->task_objq));
2006 	task_objq_lock_destroy(task);
2007 
2008 	if (task->corpse_vmobject_list) {
2009 		kfree_data(task->corpse_vmobject_list,
2010 		    (vm_size_t)task->corpse_vmobject_list_size);
2011 	}
2012 
2013 	task_ref_count_fini(task);
2014 
2015 	task->bsd_info_ro = proc_ro_release_task((proc_ro_t)task->bsd_info_ro);
2016 
2017 	if (task->bsd_info_ro != NULL) {
2018 		proc_ro_free(task->bsd_info_ro);
2019 		task->bsd_info_ro = NULL;
2020 	}
2021 
2022 	zfree(task_zone, task);
2023 }
2024 
2025 /*
2026  *	task_name_deallocate_mig:
2027  *
2028  *	Drop a reference on a task name.
2029  */
2030 void
task_name_deallocate_mig(task_name_t task_name)2031 task_name_deallocate_mig(
2032 	task_name_t             task_name)
2033 {
2034 	return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2035 }
2036 
2037 /*
2038  *	task_policy_set_deallocate_mig:
2039  *
2040  *	Drop a reference on a task type.
2041  */
2042 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2043 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2044 {
2045 	return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2046 }
2047 
2048 /*
2049  *	task_policy_get_deallocate_mig:
2050  *
2051  *	Drop a reference on a task type.
2052  */
2053 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2054 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2055 {
2056 	return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2057 }
2058 
2059 /*
2060  *	task_inspect_deallocate_mig:
2061  *
2062  *	Drop a task inspection reference.
2063  */
2064 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2065 task_inspect_deallocate_mig(
2066 	task_inspect_t          task_inspect)
2067 {
2068 	return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2069 }
2070 
2071 /*
2072  *	task_read_deallocate_mig:
2073  *
2074  *	Drop a reference on task read port.
2075  */
2076 void
task_read_deallocate_mig(task_read_t task_read)2077 task_read_deallocate_mig(
2078 	task_read_t          task_read)
2079 {
2080 	return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2081 }
2082 
2083 /*
2084  *	task_suspension_token_deallocate:
2085  *
2086  *	Drop a reference on a task suspension token.
2087  */
2088 void
task_suspension_token_deallocate(task_suspension_token_t token)2089 task_suspension_token_deallocate(
2090 	task_suspension_token_t         token)
2091 {
2092 	return task_deallocate((task_t)token);
2093 }
2094 
2095 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2096 task_suspension_token_deallocate_grp(
2097 	task_suspension_token_t         token,
2098 	task_grp_t                      grp)
2099 {
2100 	return task_deallocate_grp((task_t)token, grp);
2101 }
2102 
2103 /*
2104  * task_collect_crash_info:
2105  *
2106  * collect crash info from bsd and mach based data
2107  */
2108 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2109 task_collect_crash_info(
2110 	task_t task,
2111 #ifdef CONFIG_MACF
2112 	struct label *crash_label,
2113 #endif
2114 	int is_corpse_fork)
2115 {
2116 	kern_return_t kr = KERN_SUCCESS;
2117 
2118 	kcdata_descriptor_t crash_data = NULL;
2119 	kcdata_descriptor_t crash_data_release = NULL;
2120 	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2121 	mach_vm_offset_t crash_data_ptr = 0;
2122 	void *crash_data_kernel = NULL;
2123 	void *crash_data_kernel_release = NULL;
2124 #if CONFIG_MACF
2125 	struct label *label, *free_label;
2126 #endif
2127 
2128 	if (!corpses_enabled()) {
2129 		return KERN_NOT_SUPPORTED;
2130 	}
2131 
2132 #if CONFIG_MACF
2133 	free_label = label = mac_exc_create_label(NULL);
2134 #endif
2135 
2136 	task_lock(task);
2137 
2138 	assert(is_corpse_fork || task->bsd_info != NULL);
2139 	if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2140 #if CONFIG_MACF
2141 		/* Set the crash label, used by the exception delivery mac hook */
2142 		free_label = get_task_crash_label(task);         // Most likely NULL.
2143 		set_task_crash_label(task, label);
2144 		mac_exc_update_task_crash_label(task, crash_label);
2145 #endif
2146 		task_unlock(task);
2147 
2148 		crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2149 		    Z_WAITOK | Z_ZERO);
2150 		if (crash_data_kernel == NULL) {
2151 			kr = KERN_RESOURCE_SHORTAGE;
2152 			goto out_no_lock;
2153 		}
2154 		crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2155 
2156 		/* Do not get a corpse ref for corpse fork */
2157 		crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2158 		    is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2159 		    KCFLAG_USE_MEMCOPY);
2160 		if (crash_data) {
2161 			task_lock(task);
2162 			crash_data_release = task->corpse_info;
2163 			crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2164 			task->corpse_info = crash_data;
2165 
2166 			task_unlock(task);
2167 			kr = KERN_SUCCESS;
2168 		} else {
2169 			kfree_data(crash_data_kernel,
2170 			    CORPSEINFO_ALLOCATION_SIZE);
2171 			kr = KERN_FAILURE;
2172 		}
2173 
2174 		if (crash_data_release != NULL) {
2175 			task_crashinfo_destroy(crash_data_release);
2176 		}
2177 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2178 	} else {
2179 		task_unlock(task);
2180 	}
2181 
2182 out_no_lock:
2183 #if CONFIG_MACF
2184 	if (free_label != NULL) {
2185 		mac_exc_free_label(free_label);
2186 	}
2187 #endif
2188 	return kr;
2189 }
2190 
2191 /*
2192  * task_deliver_crash_notification:
2193  *
2194  * Makes outcall to registered host port for a corpse.
2195  */
2196 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2197 task_deliver_crash_notification(
2198 	task_t corpse, /* corpse or corpse fork */
2199 	thread_t thread,
2200 	exception_type_t etype,
2201 	mach_exception_subcode_t subcode)
2202 {
2203 	kcdata_descriptor_t crash_info = corpse->corpse_info;
2204 	thread_t th_iter = NULL;
2205 	kern_return_t kr = KERN_SUCCESS;
2206 	wait_interrupt_t wsave;
2207 	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2208 	ipc_port_t corpse_port;
2209 
2210 	if (crash_info == NULL) {
2211 		return KERN_FAILURE;
2212 	}
2213 
2214 	assert(task_is_a_corpse(corpse));
2215 
2216 	task_lock(corpse);
2217 
2218 	/*
2219 	 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2220 	 * Crash reporters should derive whether it's fatal from corpse blob.
2221 	 */
2222 	code[0] = etype;
2223 	code[1] = subcode;
2224 
2225 	queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2226 	{
2227 		if (th_iter->corpse_dup == FALSE) {
2228 			ipc_thread_reset(th_iter);
2229 		}
2230 	}
2231 	task_unlock(corpse);
2232 
2233 	/* Arm the no-sender notification for taskport */
2234 	task_reference(corpse);
2235 	corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2236 
2237 	wsave = thread_interrupt_level(THREAD_UNINT);
2238 	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2239 	if (kr != KERN_SUCCESS) {
2240 		printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2241 	}
2242 
2243 	(void)thread_interrupt_level(wsave);
2244 
2245 	/*
2246 	 * Drop the send right on corpse port, will fire the
2247 	 * no-sender notification if exception deliver failed.
2248 	 */
2249 	ipc_port_release_send(corpse_port);
2250 	return kr;
2251 }
2252 
2253 /*
2254  *	task_terminate:
2255  *
2256  *	Terminate the specified task.  See comments on thread_terminate
2257  *	(kern/thread.c) about problems with terminating the "current task."
2258  */
2259 
2260 kern_return_t
task_terminate(task_t task)2261 task_terminate(
2262 	task_t          task)
2263 {
2264 	if (task == TASK_NULL) {
2265 		return KERN_INVALID_ARGUMENT;
2266 	}
2267 
2268 	if (task->bsd_info) {
2269 		return KERN_FAILURE;
2270 	}
2271 
2272 	return task_terminate_internal(task);
2273 }
2274 
2275 #if MACH_ASSERT
2276 extern int proc_pid(struct proc *);
2277 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2278 #endif /* MACH_ASSERT */
2279 
2280 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2281 static void
task_partial_reap(task_t task,__unused int pid)2282 __unused task_partial_reap(task_t task, __unused int pid)
2283 {
2284 	unsigned int    reclaimed_resident = 0;
2285 	unsigned int    reclaimed_compressed = 0;
2286 	uint64_t        task_page_count;
2287 
2288 	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2289 
2290 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2291 	    pid, task_page_count, 0, 0, 0);
2292 
2293 	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2294 
2295 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2296 	    pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2297 }
2298 
2299 /*
2300  * task_mark_corpse:
2301  *
2302  * Mark the task as a corpse. Called by crashing thread.
2303  */
2304 kern_return_t
task_mark_corpse(task_t task)2305 task_mark_corpse(task_t task)
2306 {
2307 	kern_return_t kr = KERN_SUCCESS;
2308 	thread_t self_thread;
2309 	(void) self_thread;
2310 	wait_interrupt_t wsave;
2311 #if CONFIG_MACF
2312 	struct label *crash_label = NULL;
2313 #endif
2314 
2315 	assert(task != kernel_task);
2316 	assert(task == current_task());
2317 	assert(!task_is_a_corpse(task));
2318 
2319 #if CONFIG_MACF
2320 	crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2321 #endif
2322 
2323 	kr = task_collect_crash_info(task,
2324 #if CONFIG_MACF
2325 	    crash_label,
2326 #endif
2327 	    FALSE);
2328 	if (kr != KERN_SUCCESS) {
2329 		goto out;
2330 	}
2331 
2332 	self_thread = current_thread();
2333 
2334 	wsave = thread_interrupt_level(THREAD_UNINT);
2335 	task_lock(task);
2336 
2337 	/*
2338 	 * Check if any other thread called task_terminate_internal
2339 	 * and made the task inactive before we could mark it for
2340 	 * corpse pending report. Bail out if the task is inactive.
2341 	 */
2342 	if (!task->active) {
2343 		kcdata_descriptor_t crash_data_release = task->corpse_info;;
2344 		void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2345 
2346 		task->corpse_info = NULL;
2347 		task_unlock(task);
2348 
2349 		if (crash_data_release != NULL) {
2350 			task_crashinfo_destroy(crash_data_release);
2351 		}
2352 		kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2353 		return KERN_TERMINATED;
2354 	}
2355 
2356 	task_set_corpse_pending_report(task);
2357 	task_set_corpse(task);
2358 	task->crashed_thread_id = thread_tid(self_thread);
2359 
2360 	kr = task_start_halt_locked(task, TRUE);
2361 	assert(kr == KERN_SUCCESS);
2362 
2363 	task_set_uniqueid(task);
2364 
2365 	task_unlock(task);
2366 
2367 	/*
2368 	 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2369 	 * disable old ports here instead.
2370 	 */
2371 	ipc_task_disable(task);
2372 
2373 	/* terminate the ipc space */
2374 	ipc_space_terminate(task->itk_space);
2375 
2376 	/* Add it to global corpse task list */
2377 	task_add_to_corpse_task_list(task);
2378 
2379 	thread_terminate_internal(self_thread);
2380 
2381 	(void) thread_interrupt_level(wsave);
2382 	assert(task->halting == TRUE);
2383 
2384 out:
2385 #if CONFIG_MACF
2386 	mac_exc_free_label(crash_label);
2387 #endif
2388 	return kr;
2389 }
2390 
2391 /*
2392  *	task_set_uniqueid
2393  *
2394  *	Set task uniqueid to systemwide unique 64 bit value
2395  */
2396 void
task_set_uniqueid(task_t task)2397 task_set_uniqueid(task_t task)
2398 {
2399 	task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2400 }
2401 
2402 /*
2403  *	task_clear_corpse
2404  *
2405  *	Clears the corpse pending bit on task.
2406  *	Removes inspection bit on the threads.
2407  */
2408 void
task_clear_corpse(task_t task)2409 task_clear_corpse(task_t task)
2410 {
2411 	thread_t th_iter = NULL;
2412 
2413 	task_lock(task);
2414 	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2415 	{
2416 		thread_mtx_lock(th_iter);
2417 		th_iter->inspection = FALSE;
2418 		ipc_thread_disable(th_iter);
2419 		thread_mtx_unlock(th_iter);
2420 	}
2421 
2422 	thread_terminate_crashed_threads();
2423 	/* remove the pending corpse report flag */
2424 	task_clear_corpse_pending_report(task);
2425 
2426 	task_unlock(task);
2427 }
2428 
2429 /*
2430  *	task_port_no_senders
2431  *
2432  *	Called whenever the Mach port system detects no-senders on
2433  *	the task port of a corpse.
2434  *	Each notification that comes in should terminate the task (corpse).
2435  */
2436 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2437 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2438 {
2439 	task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2440 
2441 	assert(task != TASK_NULL);
2442 	assert(task_is_a_corpse(task));
2443 
2444 	/* Remove the task from global corpse task list */
2445 	task_remove_from_corpse_task_list(task);
2446 
2447 	task_clear_corpse(task);
2448 	task_terminate_internal(task);
2449 }
2450 
2451 /*
2452  *	task_port_with_flavor_no_senders
2453  *
2454  *	Called whenever the Mach port system detects no-senders on
2455  *	the task inspect or read port. These ports are allocated lazily and
2456  *	should be deallocated here when there are no senders remaining.
2457  */
2458 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2459 task_port_with_flavor_no_senders(
2460 	ipc_port_t          port,
2461 	mach_port_mscount_t mscount __unused)
2462 {
2463 	task_t task;
2464 	mach_task_flavor_t flavor;
2465 	ipc_kobject_type_t kotype;
2466 
2467 	ip_mq_lock(port);
2468 	if (port->ip_srights > 0) {
2469 		ip_mq_unlock(port);
2470 		return;
2471 	}
2472 	kotype = ip_kotype(port);
2473 	assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2474 	task = ipc_kobject_get_locked(port, kotype);
2475 	if (task != TASK_NULL) {
2476 		task_reference(task);
2477 	}
2478 	ip_mq_unlock(port);
2479 
2480 	if (task == TASK_NULL) {
2481 		/* The task is exiting or disabled; it will eventually deallocate the port */
2482 		return;
2483 	}
2484 
2485 	if (kotype == IKOT_TASK_READ) {
2486 		flavor = TASK_FLAVOR_READ;
2487 	} else {
2488 		flavor = TASK_FLAVOR_INSPECT;
2489 	}
2490 
2491 	itk_lock(task);
2492 	ip_mq_lock(port);
2493 
2494 	/*
2495 	 * If the port is no longer active, then ipc_task_terminate() ran
2496 	 * and destroyed the kobject already. Just deallocate the task
2497 	 * ref we took and go away.
2498 	 *
2499 	 * It is also possible that several nsrequests are in flight,
2500 	 * only one shall NULL-out the port entry, and this is the one
2501 	 * that gets to dealloc the port.
2502 	 *
2503 	 * Check for a stale no-senders notification. A call to any function
2504 	 * that vends out send rights to this port could resurrect it between
2505 	 * this notification being generated and actually being handled here.
2506 	 */
2507 	if (!ip_active(port) ||
2508 	    task->itk_task_ports[flavor] != port ||
2509 	    port->ip_srights > 0) {
2510 		ip_mq_unlock(port);
2511 		itk_unlock(task);
2512 		task_deallocate(task);
2513 		return;
2514 	}
2515 
2516 	assert(task->itk_task_ports[flavor] == port);
2517 	task->itk_task_ports[flavor] = IP_NULL;
2518 	itk_unlock(task);
2519 
2520 	ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2521 
2522 	task_deallocate(task);
2523 }
2524 
2525 /*
2526  *	task_wait_till_threads_terminate_locked
2527  *
2528  *	Wait till all the threads in the task are terminated.
2529  *	Might release the task lock and re-acquire it.
2530  */
2531 void
task_wait_till_threads_terminate_locked(task_t task)2532 task_wait_till_threads_terminate_locked(task_t task)
2533 {
2534 	/* wait for all the threads in the task to terminate */
2535 	while (task->active_thread_count != 0) {
2536 		assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2537 		task_unlock(task);
2538 		thread_block(THREAD_CONTINUE_NULL);
2539 
2540 		task_lock(task);
2541 	}
2542 }
2543 
2544 /*
2545  *	task_duplicate_map_and_threads
2546  *
2547  *	Copy vmmap of source task.
2548  *	Copy active threads from source task to destination task.
2549  *	Source task would be suspended during the copy.
2550  */
2551 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2552 task_duplicate_map_and_threads(
2553 	task_t task,
2554 	void *p,
2555 	task_t new_task,
2556 	thread_t *thread_ret,
2557 	uint64_t **udata_buffer,
2558 	int *size,
2559 	int *num_udata,
2560 	bool for_exception)
2561 {
2562 	kern_return_t kr = KERN_SUCCESS;
2563 	int active;
2564 	thread_t thread, self, thread_return = THREAD_NULL;
2565 	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2566 	thread_t *thread_array;
2567 	uint32_t active_thread_count = 0, array_count = 0, i;
2568 	vm_map_t oldmap;
2569 	uint64_t *buffer = NULL;
2570 	int buf_size = 0;
2571 	int est_knotes = 0, num_knotes = 0;
2572 
2573 	self = current_thread();
2574 
2575 	/*
2576 	 * Suspend the task to copy thread state, use the internal
2577 	 * variant so that no user-space process can resume
2578 	 * the task from under us
2579 	 */
2580 	kr = task_suspend_internal(task);
2581 	if (kr != KERN_SUCCESS) {
2582 		return kr;
2583 	}
2584 
2585 	if (task->map->disable_vmentry_reuse == TRUE) {
2586 		/*
2587 		 * Quite likely GuardMalloc (or some debugging tool)
2588 		 * is being used on this task. And it has gone through
2589 		 * its limit. Making a corpse will likely encounter
2590 		 * a lot of VM entries that will need COW.
2591 		 *
2592 		 * Skip it.
2593 		 */
2594 #if DEVELOPMENT || DEBUG
2595 		memorystatus_abort_vm_map_fork(task);
2596 #endif
2597 		task_resume_internal(task);
2598 		return KERN_FAILURE;
2599 	}
2600 
2601 	/* Check with VM if vm_map_fork is allowed for this task */
2602 	if (memorystatus_allowed_vm_map_fork(task)) {
2603 		/* Setup new task's vmmap, switch from parent task's map to it COW map */
2604 		oldmap = new_task->map;
2605 		new_task->map = vm_map_fork(new_task->ledger,
2606 		    task->map,
2607 		    (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2608 		    VM_MAP_FORK_PRESERVE_PURGEABLE |
2609 		    VM_MAP_FORK_CORPSE_FOOTPRINT));
2610 		if (new_task->map) {
2611 			vm_map_deallocate(oldmap);
2612 
2613 			/* copy ledgers that impact the memory footprint */
2614 			vm_map_copy_footprint_ledgers(task, new_task);
2615 
2616 			/* Get all the udata pointers from kqueue */
2617 			est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2618 			if (est_knotes > 0) {
2619 				buf_size = (est_knotes + 32) * sizeof(uint64_t);
2620 				buffer = kalloc_data(buf_size, Z_WAITOK);
2621 				num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2622 				if (num_knotes > est_knotes + 32) {
2623 					num_knotes = est_knotes + 32;
2624 				}
2625 			}
2626 		} else {
2627 			new_task->map = oldmap;
2628 #if DEVELOPMENT || DEBUG
2629 			memorystatus_abort_vm_map_fork(task);
2630 #endif
2631 			task_resume_internal(task);
2632 			return KERN_NO_SPACE;
2633 		}
2634 	} else if (!for_exception) {
2635 #if DEVELOPMENT || DEBUG
2636 		memorystatus_abort_vm_map_fork(task);
2637 #endif
2638 		task_resume_internal(task);
2639 		return KERN_NO_SPACE;
2640 	}
2641 
2642 	active_thread_count = task->active_thread_count;
2643 	if (active_thread_count == 0) {
2644 		kfree_data(buffer, buf_size);
2645 		task_resume_internal(task);
2646 		return KERN_FAILURE;
2647 	}
2648 
2649 	thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2650 
2651 	/* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2652 	task_lock(task);
2653 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2654 		/* Skip inactive threads */
2655 		active = thread->active;
2656 		if (!active) {
2657 			continue;
2658 		}
2659 
2660 		if (array_count >= active_thread_count) {
2661 			break;
2662 		}
2663 
2664 		thread_array[array_count++] = thread;
2665 		thread_reference(thread);
2666 	}
2667 	task_unlock(task);
2668 
2669 	for (i = 0; i < array_count; i++) {
2670 		kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2671 		if (kr != KERN_SUCCESS) {
2672 			break;
2673 		}
2674 
2675 		/* Equivalent of current thread in corpse */
2676 		if (thread_array[i] == self) {
2677 			thread_return = new_thread;
2678 			new_task->crashed_thread_id = thread_tid(new_thread);
2679 		} else if (first_thread == NULL) {
2680 			first_thread = new_thread;
2681 		} else {
2682 			/* drop the extra ref returned by thread_create_with_continuation */
2683 			thread_deallocate(new_thread);
2684 		}
2685 
2686 		kr = thread_dup2(thread_array[i], new_thread);
2687 		if (kr != KERN_SUCCESS) {
2688 			thread_mtx_lock(new_thread);
2689 			new_thread->corpse_dup = TRUE;
2690 			thread_mtx_unlock(new_thread);
2691 			continue;
2692 		}
2693 
2694 		/* Copy thread name */
2695 		bsd_copythreadname(get_bsdthread_info(new_thread),
2696 		    get_bsdthread_info(thread_array[i]));
2697 		new_thread->thread_tag = thread_array[i]->thread_tag;
2698 		thread_copy_resource_info(new_thread, thread_array[i]);
2699 	}
2700 
2701 	/* return the first thread if we couldn't find the equivalent of current */
2702 	if (thread_return == THREAD_NULL) {
2703 		thread_return = first_thread;
2704 	} else if (first_thread != THREAD_NULL) {
2705 		/* drop the extra ref returned by thread_create_with_continuation */
2706 		thread_deallocate(first_thread);
2707 	}
2708 
2709 	task_resume_internal(task);
2710 
2711 	for (i = 0; i < array_count; i++) {
2712 		thread_deallocate(thread_array[i]);
2713 	}
2714 	kfree_type(thread_t, active_thread_count, thread_array);
2715 
2716 	if (kr == KERN_SUCCESS) {
2717 		*thread_ret = thread_return;
2718 		*udata_buffer = buffer;
2719 		*size = buf_size;
2720 		*num_udata = num_knotes;
2721 	} else {
2722 		if (thread_return != THREAD_NULL) {
2723 			thread_deallocate(thread_return);
2724 		}
2725 		kfree_data(buffer, buf_size);
2726 	}
2727 
2728 	return kr;
2729 }
2730 
2731 #if CONFIG_SECLUDED_MEMORY
2732 extern void task_set_can_use_secluded_mem_locked(
2733 	task_t          task,
2734 	boolean_t       can_use_secluded_mem);
2735 #endif /* CONFIG_SECLUDED_MEMORY */
2736 
2737 #if MACH_ASSERT
2738 int debug4k_panic_on_terminate = 0;
2739 #endif /* MACH_ASSERT */
2740 kern_return_t
task_terminate_internal(task_t task)2741 task_terminate_internal(
2742 	task_t                  task)
2743 {
2744 	thread_t                        thread, self;
2745 	task_t                          self_task;
2746 	boolean_t                       interrupt_save;
2747 	int                             pid = 0;
2748 
2749 	assert(task != kernel_task);
2750 
2751 	self = current_thread();
2752 	self_task = current_task();
2753 
2754 	/*
2755 	 *	Get the task locked and make sure that we are not racing
2756 	 *	with someone else trying to terminate us.
2757 	 */
2758 	if (task == self_task) {
2759 		task_lock(task);
2760 	} else if (task < self_task) {
2761 		task_lock(task);
2762 		task_lock(self_task);
2763 	} else {
2764 		task_lock(self_task);
2765 		task_lock(task);
2766 	}
2767 
2768 #if CONFIG_SECLUDED_MEMORY
2769 	if (task->task_can_use_secluded_mem) {
2770 		task_set_can_use_secluded_mem_locked(task, FALSE);
2771 	}
2772 	task->task_could_use_secluded_mem = FALSE;
2773 	task->task_could_also_use_secluded_mem = FALSE;
2774 
2775 	if (task->task_suppressed_secluded) {
2776 		stop_secluded_suppression(task);
2777 	}
2778 #endif /* CONFIG_SECLUDED_MEMORY */
2779 
2780 	if (!task->active) {
2781 		/*
2782 		 *	Task is already being terminated.
2783 		 *	Just return an error. If we are dying, this will
2784 		 *	just get us to our AST special handler and that
2785 		 *	will get us to finalize the termination of ourselves.
2786 		 */
2787 		task_unlock(task);
2788 		if (self_task != task) {
2789 			task_unlock(self_task);
2790 		}
2791 
2792 		return KERN_FAILURE;
2793 	}
2794 
2795 	if (task_corpse_pending_report(task)) {
2796 		/*
2797 		 *	Task is marked for reporting as corpse.
2798 		 *	Just return an error. This will
2799 		 *	just get us to our AST special handler and that
2800 		 *	will get us to finish the path to death
2801 		 */
2802 		task_unlock(task);
2803 		if (self_task != task) {
2804 			task_unlock(self_task);
2805 		}
2806 
2807 		return KERN_FAILURE;
2808 	}
2809 
2810 	if (self_task != task) {
2811 		task_unlock(self_task);
2812 	}
2813 
2814 	/*
2815 	 * Make sure the current thread does not get aborted out of
2816 	 * the waits inside these operations.
2817 	 */
2818 	interrupt_save = thread_interrupt_level(THREAD_UNINT);
2819 
2820 	/*
2821 	 *	Indicate that we want all the threads to stop executing
2822 	 *	at user space by holding the task (we would have held
2823 	 *	each thread independently in thread_terminate_internal -
2824 	 *	but this way we may be more likely to already find it
2825 	 *	held there).  Mark the task inactive, and prevent
2826 	 *	further task operations via the task port.
2827 	 */
2828 	task_hold_locked(task);
2829 	task->active = FALSE;
2830 	ipc_task_disable(task);
2831 
2832 #if CONFIG_TELEMETRY
2833 	/*
2834 	 * Notify telemetry that this task is going away.
2835 	 */
2836 	telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2837 #endif
2838 
2839 	/*
2840 	 *	Terminate each thread in the task.
2841 	 */
2842 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2843 		thread_terminate_internal(thread);
2844 	}
2845 
2846 #ifdef MACH_BSD
2847 	if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2848 		pid = proc_pid(task->bsd_info);
2849 	}
2850 #endif /* MACH_BSD */
2851 
2852 	task_unlock(task);
2853 
2854 	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2855 	    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2856 
2857 	/* Early object reap phase */
2858 
2859 // PR-17045188: Revisit implementation
2860 //        task_partial_reap(task, pid);
2861 
2862 #if CONFIG_TASKWATCH
2863 	/*
2864 	 * remove all task watchers
2865 	 */
2866 	task_removewatchers(task);
2867 
2868 #endif /* CONFIG_TASKWATCH */
2869 
2870 	/*
2871 	 *	Destroy all synchronizers owned by the task.
2872 	 */
2873 	task_synchronizer_destroy_all(task);
2874 
2875 	/*
2876 	 *	Clear the watchport boost on the task.
2877 	 */
2878 	task_remove_turnstile_watchports(task);
2879 
2880 	/*
2881 	 *	Destroy the IPC space, leaving just a reference for it.
2882 	 */
2883 	ipc_space_terminate(task->itk_space);
2884 
2885 #if 00
2886 	/* if some ledgers go negative on tear-down again... */
2887 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2888 	    task_ledgers.phys_footprint);
2889 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2890 	    task_ledgers.internal);
2891 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2892 	    task_ledgers.iokit_mapped);
2893 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2894 	    task_ledgers.alternate_accounting);
2895 	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2896 	    task_ledgers.alternate_accounting_compressed);
2897 #endif
2898 
2899 	/*
2900 	 * If the current thread is a member of the task
2901 	 * being terminated, then the last reference to
2902 	 * the task will not be dropped until the thread
2903 	 * is finally reaped.  To avoid incurring the
2904 	 * expense of removing the address space regions
2905 	 * at reap time, we do it explictly here.
2906 	 */
2907 
2908 	vm_map_lock(task->map);
2909 	vm_map_disable_hole_optimization(task->map);
2910 	vm_map_unlock(task->map);
2911 
2912 #if MACH_ASSERT
2913 	/*
2914 	 * Identify the pmap's process, in case the pmap ledgers drift
2915 	 * and we have to report it.
2916 	 */
2917 	char procname[17];
2918 	if (task->bsd_info && !task_is_exec_copy(task)) {
2919 		pid = proc_pid(task->bsd_info);
2920 		proc_name_kdp(task->bsd_info, procname, sizeof(procname));
2921 	} else {
2922 		pid = 0;
2923 		strlcpy(procname, "<unknown>", sizeof(procname));
2924 	}
2925 	pmap_set_process(task->map->pmap, pid, procname);
2926 	if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
2927 		DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
2928 		if (debug4k_panic_on_terminate) {
2929 			panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
2930 		}
2931 	}
2932 #endif /* MACH_ASSERT */
2933 
2934 	vm_map_terminate(task->map);
2935 
2936 	/* release our shared region */
2937 	vm_shared_region_set(task, NULL);
2938 
2939 #if __has_feature(ptrauth_calls)
2940 	task_set_shared_region_id(task, NULL);
2941 #endif /* __has_feature(ptrauth_calls) */
2942 
2943 	lck_mtx_lock(&tasks_threads_lock);
2944 	queue_remove(&tasks, task, task_t, tasks);
2945 	queue_enter(&terminated_tasks, task, task_t, tasks);
2946 	tasks_count--;
2947 	terminated_tasks_count++;
2948 	lck_mtx_unlock(&tasks_threads_lock);
2949 
2950 	/*
2951 	 * We no longer need to guard against being aborted, so restore
2952 	 * the previous interruptible state.
2953 	 */
2954 	thread_interrupt_level(interrupt_save);
2955 
2956 #if KPC
2957 	/* force the task to release all ctrs */
2958 	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2959 		kpc_force_all_ctrs(task, 0);
2960 	}
2961 #endif /* KPC */
2962 
2963 #if CONFIG_COALITIONS
2964 	/*
2965 	 * Leave the coalition for corpse task or task that
2966 	 * never had any active threads (e.g. fork, exec failure).
2967 	 * For task with active threads, the task will be removed
2968 	 * from coalition by last terminating thread.
2969 	 */
2970 	if (task->active_thread_count == 0) {
2971 		coalitions_remove_task(task);
2972 	}
2973 #endif
2974 
2975 #if CONFIG_FREEZE
2976 	extern int      vm_compressor_available;
2977 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
2978 		task_disown_frozen_csegs(task);
2979 		assert(queue_empty(&task->task_frozen_cseg_q));
2980 	}
2981 #endif /* CONFIG_FREEZE */
2982 
2983 
2984 	/*
2985 	 * Get rid of the task active reference on itself.
2986 	 */
2987 	task_deallocate_grp(task, TASK_GRP_INTERNAL);
2988 
2989 	return KERN_SUCCESS;
2990 }
2991 
2992 void
tasks_system_suspend(boolean_t suspend)2993 tasks_system_suspend(boolean_t suspend)
2994 {
2995 	task_t task;
2996 
2997 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
2998 	    (suspend ? DBG_FUNC_START : DBG_FUNC_END));
2999 
3000 	lck_mtx_lock(&tasks_threads_lock);
3001 	assert(tasks_suspend_state != suspend);
3002 	tasks_suspend_state = suspend;
3003 	queue_iterate(&tasks, task, task_t, tasks) {
3004 		if (task == kernel_task) {
3005 			continue;
3006 		}
3007 		suspend ? task_suspend_internal(task) : task_resume_internal(task);
3008 	}
3009 	lck_mtx_unlock(&tasks_threads_lock);
3010 }
3011 
3012 /*
3013  * task_start_halt:
3014  *
3015  *      Shut the current task down (except for the current thread) in
3016  *	preparation for dramatic changes to the task (probably exec).
3017  *	We hold the task and mark all other threads in the task for
3018  *	termination.
3019  */
3020 kern_return_t
task_start_halt(task_t task)3021 task_start_halt(task_t task)
3022 {
3023 	kern_return_t kr = KERN_SUCCESS;
3024 	task_lock(task);
3025 	kr = task_start_halt_locked(task, FALSE);
3026 	task_unlock(task);
3027 	return kr;
3028 }
3029 
3030 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3031 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3032 {
3033 	thread_t thread, self;
3034 	uint64_t dispatchqueue_offset;
3035 
3036 	assert(task != kernel_task);
3037 
3038 	self = current_thread();
3039 
3040 	if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3041 		return KERN_INVALID_ARGUMENT;
3042 	}
3043 
3044 	if (!should_mark_corpse &&
3045 	    (task->halting || !task->active || !self->active)) {
3046 		/*
3047 		 * Task or current thread is already being terminated.
3048 		 * Hurry up and return out of the current kernel context
3049 		 * so that we run our AST special handler to terminate
3050 		 * ourselves. If should_mark_corpse is set, the corpse
3051 		 * creation might have raced with exec, let the corpse
3052 		 * creation continue, once the current thread reaches AST
3053 		 * thread in exec will be woken up from task_complete_halt.
3054 		 * Exec will fail cause the proc was marked for exit.
3055 		 * Once the thread in exec reaches AST, it will call proc_exit
3056 		 * and deliver the EXC_CORPSE_NOTIFY.
3057 		 */
3058 		return KERN_FAILURE;
3059 	}
3060 
3061 	/* Thread creation will fail after this point of no return. */
3062 	task->halting = TRUE;
3063 
3064 	/*
3065 	 * Mark all the threads to keep them from starting any more
3066 	 * user-level execution. The thread_terminate_internal code
3067 	 * would do this on a thread by thread basis anyway, but this
3068 	 * gives us a better chance of not having to wait there.
3069 	 */
3070 	task_hold_locked(task);
3071 	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
3072 
3073 	/*
3074 	 * Terminate all the other threads in the task.
3075 	 */
3076 	queue_iterate(&task->threads, thread, thread_t, task_threads)
3077 	{
3078 		/*
3079 		 * Remove priority throttles for threads to terminate timely. This has
3080 		 * to be done after task_hold_locked() traps all threads to AST, but before
3081 		 * threads are marked inactive in thread_terminate_internal(). Takes thread
3082 		 * mutex lock.
3083 		 * See: thread_policy_update_tasklocked().
3084 		 */
3085 		proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3086 		    TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3087 
3088 		if (should_mark_corpse) {
3089 			thread_mtx_lock(thread);
3090 			thread->inspection = TRUE;
3091 			thread_mtx_unlock(thread);
3092 		}
3093 		if (thread != self) {
3094 			thread_terminate_internal(thread);
3095 		}
3096 	}
3097 	task->dispatchqueue_offset = dispatchqueue_offset;
3098 
3099 	task_release_locked(task);
3100 
3101 	return KERN_SUCCESS;
3102 }
3103 
3104 
3105 /*
3106  * task_complete_halt:
3107  *
3108  *	Complete task halt by waiting for threads to terminate, then clean
3109  *	up task resources (VM, port namespace, etc...) and then let the
3110  *	current thread go in the (practically empty) task context.
3111  *
3112  *	Note: task->halting flag is not cleared in order to avoid creation
3113  *	of new thread in old exec'ed task.
3114  */
3115 void
task_complete_halt(task_t task)3116 task_complete_halt(task_t task)
3117 {
3118 	task_lock(task);
3119 	assert(task->halting);
3120 	assert(task == current_task());
3121 
3122 	/*
3123 	 *	Wait for the other threads to get shut down.
3124 	 *      When the last other thread is reaped, we'll be
3125 	 *	woken up.
3126 	 */
3127 	if (task->thread_count > 1) {
3128 		assert_wait((event_t)&task->halting, THREAD_UNINT);
3129 		task_unlock(task);
3130 		thread_block(THREAD_CONTINUE_NULL);
3131 	} else {
3132 		task_unlock(task);
3133 	}
3134 
3135 	/*
3136 	 *	Give the machine dependent code a chance
3137 	 *	to perform cleanup of task-level resources
3138 	 *	associated with the current thread before
3139 	 *	ripping apart the task.
3140 	 */
3141 	machine_task_terminate(task);
3142 
3143 	/*
3144 	 *	Destroy all synchronizers owned by the task.
3145 	 */
3146 	task_synchronizer_destroy_all(task);
3147 
3148 	/*
3149 	 *	Terminate the IPC space.  A long time ago,
3150 	 *	this used to be ipc_space_clean() which would
3151 	 *	keep the space active but hollow it.
3152 	 *
3153 	 *	We really do not need this semantics given
3154 	 *	tasks die with exec now.
3155 	 */
3156 	ipc_space_terminate(task->itk_space);
3157 
3158 	/*
3159 	 * Clean out the address space, as we are going to be
3160 	 * getting a new one.
3161 	 */
3162 	vm_map_remove(task->map, task->map->min_offset,
3163 	    task->map->max_offset,
3164 	    /*
3165 	     * Final cleanup:
3166 	     * + no unnesting
3167 	     * + remove immutable mappings
3168 	     * + allow gaps in the range
3169 	     */
3170 	    (VM_MAP_REMOVE_NO_UNNESTING |
3171 	    VM_MAP_REMOVE_IMMUTABLE |
3172 	    VM_MAP_REMOVE_GAPS_OK));
3173 
3174 	/*
3175 	 * Kick out any IOKitUser handles to the task. At best they're stale,
3176 	 * at worst someone is racing a SUID exec.
3177 	 */
3178 	iokit_task_terminate(task);
3179 }
3180 
3181 /*
3182  *	task_hold_locked:
3183  *
3184  *	Suspend execution of the specified task.
3185  *	This is a recursive-style suspension of the task, a count of
3186  *	suspends is maintained.
3187  *
3188  *	CONDITIONS: the task is locked and active.
3189  */
3190 void
task_hold_locked(task_t task)3191 task_hold_locked(
3192 	task_t          task)
3193 {
3194 	thread_t        thread;
3195 
3196 	assert(task->active);
3197 
3198 	if (task->suspend_count++ > 0) {
3199 		return;
3200 	}
3201 
3202 	if (task->bsd_info) {
3203 		workq_proc_suspended(task->bsd_info);
3204 	}
3205 
3206 	/*
3207 	 *	Iterate through all the threads and hold them.
3208 	 */
3209 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3210 		thread_mtx_lock(thread);
3211 		thread_hold(thread);
3212 		thread_mtx_unlock(thread);
3213 	}
3214 }
3215 
3216 /*
3217  *	task_hold:
3218  *
3219  *	Same as the internal routine above, except that is must lock
3220  *	and verify that the task is active.  This differs from task_suspend
3221  *	in that it places a kernel hold on the task rather than just a
3222  *	user-level hold.  This keeps users from over resuming and setting
3223  *	it running out from under the kernel.
3224  *
3225  *      CONDITIONS: the caller holds a reference on the task
3226  */
3227 kern_return_t
task_hold(task_t task)3228 task_hold(
3229 	task_t          task)
3230 {
3231 	if (task == TASK_NULL) {
3232 		return KERN_INVALID_ARGUMENT;
3233 	}
3234 
3235 	task_lock(task);
3236 
3237 	if (!task->active) {
3238 		task_unlock(task);
3239 
3240 		return KERN_FAILURE;
3241 	}
3242 
3243 	task_hold_locked(task);
3244 	task_unlock(task);
3245 
3246 	return KERN_SUCCESS;
3247 }
3248 
3249 kern_return_t
task_wait(task_t task,boolean_t until_not_runnable)3250 task_wait(
3251 	task_t          task,
3252 	boolean_t       until_not_runnable)
3253 {
3254 	if (task == TASK_NULL) {
3255 		return KERN_INVALID_ARGUMENT;
3256 	}
3257 
3258 	task_lock(task);
3259 
3260 	if (!task->active) {
3261 		task_unlock(task);
3262 
3263 		return KERN_FAILURE;
3264 	}
3265 
3266 	task_wait_locked(task, until_not_runnable);
3267 	task_unlock(task);
3268 
3269 	return KERN_SUCCESS;
3270 }
3271 
3272 /*
3273  *	task_wait_locked:
3274  *
3275  *	Wait for all threads in task to stop.
3276  *
3277  * Conditions:
3278  *	Called with task locked, active, and held.
3279  */
3280 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3281 task_wait_locked(
3282 	task_t          task,
3283 	boolean_t               until_not_runnable)
3284 {
3285 	thread_t        thread, self;
3286 
3287 	assert(task->active);
3288 	assert(task->suspend_count > 0);
3289 
3290 	self = current_thread();
3291 
3292 	/*
3293 	 *	Iterate through all the threads and wait for them to
3294 	 *	stop.  Do not wait for the current thread if it is within
3295 	 *	the task.
3296 	 */
3297 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3298 		if (thread != self) {
3299 			thread_wait(thread, until_not_runnable);
3300 		}
3301 	}
3302 }
3303 
3304 boolean_t
task_is_app_suspended(task_t task)3305 task_is_app_suspended(task_t task)
3306 {
3307 	return task->pidsuspended;
3308 }
3309 
3310 /*
3311  *	task_release_locked:
3312  *
3313  *	Release a kernel hold on a task.
3314  *
3315  *      CONDITIONS: the task is locked and active
3316  */
3317 void
task_release_locked(task_t task)3318 task_release_locked(
3319 	task_t          task)
3320 {
3321 	thread_t        thread;
3322 
3323 	assert(task->active);
3324 	assert(task->suspend_count > 0);
3325 
3326 	if (--task->suspend_count > 0) {
3327 		return;
3328 	}
3329 
3330 	if (task->bsd_info) {
3331 		workq_proc_resumed(task->bsd_info);
3332 	}
3333 
3334 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3335 		thread_mtx_lock(thread);
3336 		thread_release(thread);
3337 		thread_mtx_unlock(thread);
3338 	}
3339 }
3340 
3341 /*
3342  *	task_release:
3343  *
3344  *	Same as the internal routine above, except that it must lock
3345  *	and verify that the task is active.
3346  *
3347  *      CONDITIONS: The caller holds a reference to the task
3348  */
3349 kern_return_t
task_release(task_t task)3350 task_release(
3351 	task_t          task)
3352 {
3353 	if (task == TASK_NULL) {
3354 		return KERN_INVALID_ARGUMENT;
3355 	}
3356 
3357 	task_lock(task);
3358 
3359 	if (!task->active) {
3360 		task_unlock(task);
3361 
3362 		return KERN_FAILURE;
3363 	}
3364 
3365 	task_release_locked(task);
3366 	task_unlock(task);
3367 
3368 	return KERN_SUCCESS;
3369 }
3370 
3371 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3372 task_threads_internal(
3373 	task_t                      task,
3374 	thread_act_array_t         *threads_out,
3375 	mach_msg_type_number_t     *countp,
3376 	mach_thread_flavor_t        flavor)
3377 {
3378 	mach_msg_type_number_t  actual, count, count_needed;
3379 	thread_t               *thread_list;
3380 	thread_t                thread;
3381 	unsigned int            i;
3382 
3383 	count = 0;
3384 	thread_list = NULL;
3385 
3386 	if (task == TASK_NULL) {
3387 		return KERN_INVALID_ARGUMENT;
3388 	}
3389 
3390 	assert(flavor <= THREAD_FLAVOR_INSPECT);
3391 
3392 	for (;;) {
3393 		task_lock(task);
3394 		if (!task->active) {
3395 			task_unlock(task);
3396 
3397 			kfree_type(thread_t, count, thread_list);
3398 			return KERN_FAILURE;
3399 		}
3400 
3401 		count_needed = actual = task->thread_count;
3402 		if (count_needed <= count) {
3403 			break;
3404 		}
3405 
3406 		/* unlock the task and allocate more memory */
3407 		task_unlock(task);
3408 
3409 		kfree_type(thread_t, count, thread_list);
3410 		count = count_needed;
3411 		thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3412 
3413 		if (thread_list == NULL) {
3414 			return KERN_RESOURCE_SHORTAGE;
3415 		}
3416 	}
3417 
3418 	i = 0;
3419 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3420 		assert(i < actual);
3421 		thread_reference(thread);
3422 		thread_list[i++] = thread;
3423 	}
3424 
3425 	count_needed = actual;
3426 
3427 	/* can unlock task now that we've got the thread refs */
3428 	task_unlock(task);
3429 
3430 	if (actual == 0) {
3431 		/* no threads, so return null pointer and deallocate memory */
3432 
3433 		*threads_out = NULL;
3434 		*countp = 0;
3435 		kfree_type(thread_t, count, thread_list);
3436 	} else {
3437 		/* if we allocated too much, must copy */
3438 		if (count_needed < count) {
3439 			void *newaddr;
3440 
3441 			newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3442 			if (newaddr == NULL) {
3443 				for (i = 0; i < actual; ++i) {
3444 					thread_deallocate(thread_list[i]);
3445 				}
3446 				kfree_type(thread_t, count, thread_list);
3447 				return KERN_RESOURCE_SHORTAGE;
3448 			}
3449 
3450 			bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3451 			kfree_type(thread_t, count, thread_list);
3452 			thread_list = (thread_t *)newaddr;
3453 		}
3454 
3455 		*threads_out = thread_list;
3456 		*countp = actual;
3457 
3458 		/* do the conversion that Mig should handle */
3459 
3460 		switch (flavor) {
3461 		case THREAD_FLAVOR_CONTROL:
3462 			if (task == current_task()) {
3463 				for (i = 0; i < actual; ++i) {
3464 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3465 				}
3466 			} else {
3467 				for (i = 0; i < actual; ++i) {
3468 					((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3469 				}
3470 			}
3471 			break;
3472 		case THREAD_FLAVOR_READ:
3473 			for (i = 0; i < actual; ++i) {
3474 				((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3475 			}
3476 			break;
3477 		case THREAD_FLAVOR_INSPECT:
3478 			for (i = 0; i < actual; ++i) {
3479 				((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3480 			}
3481 			break;
3482 		}
3483 	}
3484 
3485 	return KERN_SUCCESS;
3486 }
3487 
3488 kern_return_t
task_threads(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3489 task_threads(
3490 	task_t                      task,
3491 	thread_act_array_t         *threads_out,
3492 	mach_msg_type_number_t     *count)
3493 {
3494 	return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3495 }
3496 
3497 
3498 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3499 task_threads_from_user(
3500 	mach_port_t                 port,
3501 	thread_act_array_t         *threads_out,
3502 	mach_msg_type_number_t     *count)
3503 {
3504 	ipc_kobject_type_t kotype;
3505 	kern_return_t kr;
3506 
3507 	task_t task = convert_port_to_task_inspect_no_eval(port);
3508 
3509 	if (task == TASK_NULL) {
3510 		return KERN_INVALID_ARGUMENT;
3511 	}
3512 
3513 	kotype = ip_kotype(port);
3514 
3515 	switch (kotype) {
3516 	case IKOT_TASK_CONTROL:
3517 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3518 		break;
3519 	case IKOT_TASK_READ:
3520 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3521 		break;
3522 	case IKOT_TASK_INSPECT:
3523 		kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3524 		break;
3525 	default:
3526 		panic("strange kobject type");
3527 		break;
3528 	}
3529 
3530 	task_deallocate(task);
3531 	return kr;
3532 }
3533 
3534 #define TASK_HOLD_NORMAL        0
3535 #define TASK_HOLD_PIDSUSPEND    1
3536 #define TASK_HOLD_LEGACY        2
3537 #define TASK_HOLD_LEGACY_ALL    3
3538 
3539 static kern_return_t
place_task_hold(task_t task,int mode)3540 place_task_hold(
3541 	task_t task,
3542 	int mode)
3543 {
3544 	if (!task->active && !task_is_a_corpse(task)) {
3545 		return KERN_FAILURE;
3546 	}
3547 
3548 	/* Return success for corpse task */
3549 	if (task_is_a_corpse(task)) {
3550 		return KERN_SUCCESS;
3551 	}
3552 
3553 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3554 	    task_pid(task),
3555 	    task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3556 	    task->user_stop_count, task->user_stop_count + 1);
3557 
3558 #if MACH_ASSERT
3559 	current_task()->suspends_outstanding++;
3560 #endif
3561 
3562 	if (mode == TASK_HOLD_LEGACY) {
3563 		task->legacy_stop_count++;
3564 	}
3565 
3566 	if (task->user_stop_count++ > 0) {
3567 		/*
3568 		 *	If the stop count was positive, the task is
3569 		 *	already stopped and we can exit.
3570 		 */
3571 		return KERN_SUCCESS;
3572 	}
3573 
3574 	/*
3575 	 * Put a kernel-level hold on the threads in the task (all
3576 	 * user-level task suspensions added together represent a
3577 	 * single kernel-level hold).  We then wait for the threads
3578 	 * to stop executing user code.
3579 	 */
3580 	task_hold_locked(task);
3581 	task_wait_locked(task, FALSE);
3582 
3583 	return KERN_SUCCESS;
3584 }
3585 
3586 static kern_return_t
release_task_hold(task_t task,int mode)3587 release_task_hold(
3588 	task_t          task,
3589 	int                     mode)
3590 {
3591 	boolean_t release = FALSE;
3592 
3593 	if (!task->active && !task_is_a_corpse(task)) {
3594 		return KERN_FAILURE;
3595 	}
3596 
3597 	/* Return success for corpse task */
3598 	if (task_is_a_corpse(task)) {
3599 		return KERN_SUCCESS;
3600 	}
3601 
3602 	if (mode == TASK_HOLD_PIDSUSPEND) {
3603 		if (task->pidsuspended == FALSE) {
3604 			return KERN_FAILURE;
3605 		}
3606 		task->pidsuspended = FALSE;
3607 	}
3608 
3609 	if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3610 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3611 		    MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3612 		    task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3613 		    task->user_stop_count, mode, task->legacy_stop_count);
3614 
3615 #if MACH_ASSERT
3616 		/*
3617 		 * This is obviously not robust; if we suspend one task and then resume a different one,
3618 		 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3619 		 * or buggy suspender.
3620 		 */
3621 		current_task()->suspends_outstanding--;
3622 #endif
3623 
3624 		if (mode == TASK_HOLD_LEGACY_ALL) {
3625 			if (task->legacy_stop_count >= task->user_stop_count) {
3626 				task->user_stop_count = 0;
3627 				release = TRUE;
3628 			} else {
3629 				task->user_stop_count -= task->legacy_stop_count;
3630 			}
3631 			task->legacy_stop_count = 0;
3632 		} else {
3633 			if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3634 				task->legacy_stop_count--;
3635 			}
3636 			if (--task->user_stop_count == 0) {
3637 				release = TRUE;
3638 			}
3639 		}
3640 	} else {
3641 		return KERN_FAILURE;
3642 	}
3643 
3644 	/*
3645 	 *	Release the task if necessary.
3646 	 */
3647 	if (release) {
3648 		task_release_locked(task);
3649 	}
3650 
3651 	return KERN_SUCCESS;
3652 }
3653 
3654 boolean_t
get_task_suspended(task_t task)3655 get_task_suspended(task_t task)
3656 {
3657 	return 0 != task->user_stop_count;
3658 }
3659 
3660 /*
3661  *	task_suspend:
3662  *
3663  *	Implement an (old-fashioned) user-level suspension on a task.
3664  *
3665  *	Because the user isn't expecting to have to manage a suspension
3666  *	token, we'll track it for him in the kernel in the form of a naked
3667  *	send right to the task's resume port.  All such send rights
3668  *	account for a single suspension against the task (unlike task_suspend2()
3669  *	where each caller gets a unique suspension count represented by a
3670  *	unique send-once right).
3671  *
3672  * Conditions:
3673  *      The caller holds a reference to the task
3674  */
3675 kern_return_t
task_suspend(task_t task)3676 task_suspend(
3677 	task_t          task)
3678 {
3679 	kern_return_t                   kr;
3680 	mach_port_t                     port;
3681 	mach_port_name_t                name;
3682 
3683 	if (task == TASK_NULL || task == kernel_task) {
3684 		return KERN_INVALID_ARGUMENT;
3685 	}
3686 
3687 	/*
3688 	 * place a legacy hold on the task.
3689 	 */
3690 	task_lock(task);
3691 	kr = place_task_hold(task, TASK_HOLD_LEGACY);
3692 	task_unlock(task);
3693 
3694 	if (kr != KERN_SUCCESS) {
3695 		return kr;
3696 	}
3697 
3698 	/*
3699 	 * Claim a send right on the task resume port, and request a no-senders
3700 	 * notification on that port (if none outstanding).
3701 	 */
3702 	itk_lock(task);
3703 	(void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_resume,
3704 	    (ipc_kobject_t)task, IKOT_TASK_RESUME, IPC_KOBJECT_PTRAUTH_STORE,
3705 	    OS_PTRAUTH_DISCRIMINATOR("task.itk_resume"));
3706 	port = task->itk_resume; /* donates send right */
3707 	itk_unlock(task);
3708 
3709 	/*
3710 	 * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3711 	 * but we'll look it up when calling a traditional resume.  Any IPC operations that
3712 	 * deallocate the send right will auto-release the suspension.
3713 	 */
3714 	if (IP_VALID(port)) {
3715 		kr = ipc_object_copyout(current_space(), ip_to_object(port),
3716 		    MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3717 		    NULL, NULL, &name);
3718 	} else {
3719 		kr = KERN_SUCCESS;
3720 	}
3721 	if (kr != KERN_SUCCESS) {
3722 		printf("warning: %s(%d) failed to copyout suspension "
3723 		    "token for pid %d with error: %d\n",
3724 		    proc_name_address(current_task()->bsd_info),
3725 		    proc_pid(current_task()->bsd_info),
3726 		    task_pid(task), kr);
3727 	}
3728 
3729 	return kr;
3730 }
3731 
3732 /*
3733  *	task_resume:
3734  *		Release a user hold on a task.
3735  *
3736  * Conditions:
3737  *		The caller holds a reference to the task
3738  */
3739 kern_return_t
task_resume(task_t task)3740 task_resume(
3741 	task_t  task)
3742 {
3743 	kern_return_t    kr;
3744 	mach_port_name_t resume_port_name;
3745 	ipc_entry_t              resume_port_entry;
3746 	ipc_space_t              space = current_task()->itk_space;
3747 
3748 	if (task == TASK_NULL || task == kernel_task) {
3749 		return KERN_INVALID_ARGUMENT;
3750 	}
3751 
3752 	/* release a legacy task hold */
3753 	task_lock(task);
3754 	kr = release_task_hold(task, TASK_HOLD_LEGACY);
3755 	task_unlock(task);
3756 
3757 	itk_lock(task); /* for itk_resume */
3758 	is_write_lock(space); /* spin lock */
3759 	if (is_active(space) && IP_VALID(task->itk_resume) &&
3760 	    ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3761 		/*
3762 		 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3763 		 * we are holding one less legacy hold on the task from this caller.  If the release failed,
3764 		 * go ahead and drop all the rights, as someone either already released our holds or the task
3765 		 * is gone.
3766 		 */
3767 		itk_unlock(task);
3768 		if (kr == KERN_SUCCESS) {
3769 			ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3770 		} else {
3771 			ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3772 		}
3773 		/* space unlocked */
3774 	} else {
3775 		itk_unlock(task);
3776 		is_write_unlock(space);
3777 		if (kr == KERN_SUCCESS) {
3778 			printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3779 			    proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3780 			    task_pid(task));
3781 		}
3782 	}
3783 
3784 	return kr;
3785 }
3786 
3787 /*
3788  * Suspend the target task.
3789  * Making/holding a token/reference/port is the callers responsibility.
3790  */
3791 kern_return_t
task_suspend_internal(task_t task)3792 task_suspend_internal(task_t task)
3793 {
3794 	kern_return_t    kr;
3795 
3796 	if (task == TASK_NULL || task == kernel_task) {
3797 		return KERN_INVALID_ARGUMENT;
3798 	}
3799 
3800 	task_lock(task);
3801 	kr = place_task_hold(task, TASK_HOLD_NORMAL);
3802 	task_unlock(task);
3803 	return kr;
3804 }
3805 
3806 /*
3807  * Suspend the target task, and return a suspension token. The token
3808  * represents a reference on the suspended task.
3809  */
3810 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)3811 task_suspend2_grp(
3812 	task_t                  task,
3813 	task_suspension_token_t *suspend_token,
3814 	task_grp_t              grp)
3815 {
3816 	kern_return_t    kr;
3817 
3818 	kr = task_suspend_internal(task);
3819 	if (kr != KERN_SUCCESS) {
3820 		*suspend_token = TASK_NULL;
3821 		return kr;
3822 	}
3823 
3824 	/*
3825 	 * Take a reference on the target task and return that to the caller
3826 	 * as a "suspension token," which can be converted into an SO right to
3827 	 * the now-suspended task's resume port.
3828 	 */
3829 	task_reference_grp(task, grp);
3830 	*suspend_token = task;
3831 
3832 	return KERN_SUCCESS;
3833 }
3834 
3835 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)3836 task_suspend2_mig(
3837 	task_t                  task,
3838 	task_suspension_token_t *suspend_token)
3839 {
3840 	return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
3841 }
3842 
3843 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)3844 task_suspend2_external(
3845 	task_t                  task,
3846 	task_suspension_token_t *suspend_token)
3847 {
3848 	return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
3849 }
3850 
3851 /*
3852  * Resume the task
3853  * (reference/token/port management is caller's responsibility).
3854  */
3855 kern_return_t
task_resume_internal(task_suspension_token_t task)3856 task_resume_internal(
3857 	task_suspension_token_t         task)
3858 {
3859 	kern_return_t kr;
3860 
3861 	if (task == TASK_NULL || task == kernel_task) {
3862 		return KERN_INVALID_ARGUMENT;
3863 	}
3864 
3865 	task_lock(task);
3866 	kr = release_task_hold(task, TASK_HOLD_NORMAL);
3867 	task_unlock(task);
3868 	return kr;
3869 }
3870 
3871 /*
3872  * Resume the task using a suspension token. Consumes the token's ref.
3873  */
3874 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)3875 task_resume2_grp(
3876 	task_suspension_token_t         task,
3877 	task_grp_t                      grp)
3878 {
3879 	kern_return_t kr;
3880 
3881 	kr = task_resume_internal(task);
3882 	task_suspension_token_deallocate_grp(task, grp);
3883 
3884 	return kr;
3885 }
3886 
3887 kern_return_t
task_resume2_mig(task_suspension_token_t task)3888 task_resume2_mig(
3889 	task_suspension_token_t         task)
3890 {
3891 	return task_resume2_grp(task, TASK_GRP_MIG);
3892 }
3893 
3894 kern_return_t
task_resume2_external(task_suspension_token_t task)3895 task_resume2_external(
3896 	task_suspension_token_t         task)
3897 {
3898 	return task_resume2_grp(task, TASK_GRP_EXTERNAL);
3899 }
3900 
3901 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)3902 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
3903 {
3904 	task_t task = convert_port_to_task_suspension_token(port);
3905 	kern_return_t kr;
3906 
3907 	if (task == TASK_NULL) {
3908 		return;
3909 	}
3910 
3911 	if (task == kernel_task) {
3912 		task_suspension_token_deallocate(task);
3913 		return;
3914 	}
3915 
3916 	task_lock(task);
3917 
3918 	kr = ipc_kobject_nsrequest(port, mscount, NULL);
3919 	if (kr == KERN_FAILURE) {
3920 		/* release all the [remaining] outstanding legacy holds */
3921 		release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3922 	}
3923 
3924 	task_unlock(task);
3925 
3926 	task_suspension_token_deallocate(task);         /* drop token reference */
3927 }
3928 
3929 /*
3930  * Fires when a send once made
3931  * by convert_task_suspension_token_to_port() dies.
3932  */
3933 void
task_suspension_send_once(ipc_port_t port)3934 task_suspension_send_once(ipc_port_t port)
3935 {
3936 	task_t task = convert_port_to_task_suspension_token(port);
3937 
3938 	if (task == TASK_NULL || task == kernel_task) {
3939 		return;         /* nothing to do */
3940 	}
3941 
3942 	/* release the hold held by this specific send-once right */
3943 	task_lock(task);
3944 	release_task_hold(task, TASK_HOLD_NORMAL);
3945 	task_unlock(task);
3946 
3947 	task_suspension_token_deallocate(task);         /* drop token reference */
3948 }
3949 
3950 static kern_return_t
task_pidsuspend_locked(task_t task)3951 task_pidsuspend_locked(task_t task)
3952 {
3953 	kern_return_t kr;
3954 
3955 	if (task->pidsuspended) {
3956 		kr = KERN_FAILURE;
3957 		goto out;
3958 	}
3959 
3960 	task->pidsuspended = TRUE;
3961 
3962 	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3963 	if (kr != KERN_SUCCESS) {
3964 		task->pidsuspended = FALSE;
3965 	}
3966 out:
3967 	return kr;
3968 }
3969 
3970 
3971 /*
3972  *	task_pidsuspend:
3973  *
3974  *	Suspends a task by placing a hold on its threads.
3975  *
3976  * Conditions:
3977  *      The caller holds a reference to the task
3978  */
3979 kern_return_t
task_pidsuspend(task_t task)3980 task_pidsuspend(
3981 	task_t          task)
3982 {
3983 	kern_return_t    kr;
3984 
3985 	if (task == TASK_NULL || task == kernel_task) {
3986 		return KERN_INVALID_ARGUMENT;
3987 	}
3988 
3989 	task_lock(task);
3990 
3991 	kr = task_pidsuspend_locked(task);
3992 
3993 	task_unlock(task);
3994 
3995 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3996 		iokit_task_app_suspended_changed(task);
3997 	}
3998 
3999 	return kr;
4000 }
4001 
4002 /*
4003  *	task_pidresume:
4004  *		Resumes a previously suspended task.
4005  *
4006  * Conditions:
4007  *		The caller holds a reference to the task
4008  */
4009 kern_return_t
task_pidresume(task_t task)4010 task_pidresume(
4011 	task_t  task)
4012 {
4013 	kern_return_t    kr;
4014 
4015 	if (task == TASK_NULL || task == kernel_task) {
4016 		return KERN_INVALID_ARGUMENT;
4017 	}
4018 
4019 	task_lock(task);
4020 
4021 #if CONFIG_FREEZE
4022 
4023 	while (task->changing_freeze_state) {
4024 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4025 		task_unlock(task);
4026 		thread_block(THREAD_CONTINUE_NULL);
4027 
4028 		task_lock(task);
4029 	}
4030 	task->changing_freeze_state = TRUE;
4031 #endif
4032 
4033 	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4034 
4035 	task_unlock(task);
4036 
4037 	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4038 		iokit_task_app_suspended_changed(task);
4039 	}
4040 
4041 #if CONFIG_FREEZE
4042 
4043 	task_lock(task);
4044 
4045 	if (kr == KERN_SUCCESS) {
4046 		task->frozen = FALSE;
4047 	}
4048 	task->changing_freeze_state = FALSE;
4049 	thread_wakeup(&task->changing_freeze_state);
4050 
4051 	task_unlock(task);
4052 #endif
4053 
4054 	return kr;
4055 }
4056 
4057 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4058 
4059 /*
4060  *	task_add_turnstile_watchports:
4061  *		Setup watchports to boost the main thread of the task.
4062  *
4063  *	Arguments:
4064  *		task: task being spawned
4065  *		thread: main thread of task
4066  *		portwatch_ports: array of watchports
4067  *		portwatch_count: number of watchports
4068  *
4069  *	Conditions:
4070  *		Nothing locked.
4071  */
4072 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4073 task_add_turnstile_watchports(
4074 	task_t          task,
4075 	thread_t        thread,
4076 	ipc_port_t      *portwatch_ports,
4077 	uint32_t        portwatch_count)
4078 {
4079 	struct task_watchports *watchports = NULL;
4080 	struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4081 	os_ref_count_t refs;
4082 
4083 	/* Check if the task has terminated */
4084 	if (!task->active) {
4085 		return;
4086 	}
4087 
4088 	assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4089 
4090 	watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4091 
4092 	/* Lock the ipc space */
4093 	is_write_lock(task->itk_space);
4094 
4095 	/* Setup watchports to boost the main thread */
4096 	refs = task_add_turnstile_watchports_locked(task,
4097 	    watchports, previous_elem_array, portwatch_ports,
4098 	    portwatch_count);
4099 
4100 	/* Drop the space lock */
4101 	is_write_unlock(task->itk_space);
4102 
4103 	if (refs == 0) {
4104 		task_watchports_deallocate(watchports);
4105 	}
4106 
4107 	/* Drop the ref on previous_elem_array */
4108 	for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4109 		task_watchport_elem_deallocate(previous_elem_array[i]);
4110 	}
4111 }
4112 
4113 /*
4114  *	task_remove_turnstile_watchports:
4115  *		Clear all turnstile boost on the task from watchports.
4116  *
4117  *	Arguments:
4118  *		task: task being terminated
4119  *
4120  *	Conditions:
4121  *		Nothing locked.
4122  */
4123 void
task_remove_turnstile_watchports(task_t task)4124 task_remove_turnstile_watchports(
4125 	task_t          task)
4126 {
4127 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4128 	struct task_watchports *watchports = NULL;
4129 	ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4130 	uint32_t portwatch_count;
4131 
4132 	/* Lock the ipc space */
4133 	is_write_lock(task->itk_space);
4134 
4135 	/* Check if watchport boost exist */
4136 	if (task->watchports == NULL) {
4137 		is_write_unlock(task->itk_space);
4138 		return;
4139 	}
4140 	watchports = task->watchports;
4141 	portwatch_count = watchports->tw_elem_array_count;
4142 
4143 	refs = task_remove_turnstile_watchports_locked(task, watchports,
4144 	    port_freelist);
4145 
4146 	is_write_unlock(task->itk_space);
4147 
4148 	/* Drop all the port references */
4149 	for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4150 		ip_release(port_freelist[i]);
4151 	}
4152 
4153 	/* Clear the task and thread references for task_watchport */
4154 	if (refs == 0) {
4155 		task_watchports_deallocate(watchports);
4156 	}
4157 }
4158 
4159 /*
4160  *	task_transfer_turnstile_watchports:
4161  *		Transfer all watchport turnstile boost from old task to new task.
4162  *
4163  *	Arguments:
4164  *		old_task: task calling exec
4165  *		new_task: new exec'ed task
4166  *		thread: main thread of new task
4167  *
4168  *	Conditions:
4169  *		Nothing locked.
4170  */
4171 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4172 task_transfer_turnstile_watchports(
4173 	task_t   old_task,
4174 	task_t   new_task,
4175 	thread_t new_thread)
4176 {
4177 	struct task_watchports *old_watchports = NULL;
4178 	struct task_watchports *new_watchports = NULL;
4179 	os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4180 	os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4181 	uint32_t portwatch_count;
4182 
4183 	if (old_task->watchports == NULL || !new_task->active) {
4184 		return;
4185 	}
4186 
4187 	/* Get the watch port count from the old task */
4188 	is_write_lock(old_task->itk_space);
4189 	if (old_task->watchports == NULL) {
4190 		is_write_unlock(old_task->itk_space);
4191 		return;
4192 	}
4193 
4194 	portwatch_count = old_task->watchports->tw_elem_array_count;
4195 	is_write_unlock(old_task->itk_space);
4196 
4197 	new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4198 
4199 	/* Lock the ipc space for old task */
4200 	is_write_lock(old_task->itk_space);
4201 
4202 	/* Lock the ipc space for new task */
4203 	is_write_lock(new_task->itk_space);
4204 
4205 	/* Check if watchport boost exist */
4206 	if (old_task->watchports == NULL || !new_task->active) {
4207 		is_write_unlock(new_task->itk_space);
4208 		is_write_unlock(old_task->itk_space);
4209 		(void)task_watchports_release(new_watchports);
4210 		task_watchports_deallocate(new_watchports);
4211 		return;
4212 	}
4213 
4214 	old_watchports = old_task->watchports;
4215 	assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4216 
4217 	/* Setup new task watchports */
4218 	new_task->watchports = new_watchports;
4219 
4220 	for (uint32_t i = 0; i < portwatch_count; i++) {
4221 		ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4222 
4223 		if (port == NULL) {
4224 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4225 			continue;
4226 		}
4227 
4228 		/* Lock the port and check if it has the entry */
4229 		ip_mq_lock(port);
4230 
4231 		task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4232 
4233 		if (ipc_port_replace_watchport_elem_conditional_locked(port,
4234 		    &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4235 			task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4236 
4237 			task_watchports_retain(new_watchports);
4238 			old_refs = task_watchports_release(old_watchports);
4239 
4240 			/* Check if all ports are cleaned */
4241 			if (old_refs == 0) {
4242 				old_task->watchports = NULL;
4243 			}
4244 		} else {
4245 			task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4246 		}
4247 		/* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4248 	}
4249 
4250 	/* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4251 	new_refs = task_watchports_release(new_watchports);
4252 	if (new_refs == 0) {
4253 		new_task->watchports = NULL;
4254 	}
4255 
4256 	is_write_unlock(new_task->itk_space);
4257 	is_write_unlock(old_task->itk_space);
4258 
4259 	/* Clear the task and thread references for old_watchport */
4260 	if (old_refs == 0) {
4261 		task_watchports_deallocate(old_watchports);
4262 	}
4263 
4264 	/* Clear the task and thread references for new_watchport */
4265 	if (new_refs == 0) {
4266 		task_watchports_deallocate(new_watchports);
4267 	}
4268 }
4269 
4270 /*
4271  *	task_add_turnstile_watchports_locked:
4272  *		Setup watchports to boost the main thread of the task.
4273  *
4274  *	Arguments:
4275  *		task: task to boost
4276  *		watchports: watchport structure to be attached to the task
4277  *		previous_elem_array: an array of old watchport_elem to be returned to caller
4278  *		portwatch_ports: array of watchports
4279  *		portwatch_count: number of watchports
4280  *
4281  *	Conditions:
4282  *		ipc space of the task locked.
4283  *		returns array of old watchport_elem in previous_elem_array
4284  */
4285 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4286 task_add_turnstile_watchports_locked(
4287 	task_t                      task,
4288 	struct task_watchports      *watchports,
4289 	struct task_watchport_elem  **previous_elem_array,
4290 	ipc_port_t                  *portwatch_ports,
4291 	uint32_t                    portwatch_count)
4292 {
4293 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4294 
4295 	/* Check if the task is still active */
4296 	if (!task->active) {
4297 		refs = task_watchports_release(watchports);
4298 		return refs;
4299 	}
4300 
4301 	assert(task->watchports == NULL);
4302 	task->watchports = watchports;
4303 
4304 	for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4305 		ipc_port_t port = portwatch_ports[i];
4306 
4307 		task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4308 		if (port == NULL) {
4309 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4310 			continue;
4311 		}
4312 
4313 		ip_mq_lock(port);
4314 
4315 		/* Check if port is in valid state to be setup as watchport */
4316 		if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4317 		    &previous_elem_array[j]) != KERN_SUCCESS) {
4318 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4319 			continue;
4320 		}
4321 		/* port unlocked on return */
4322 
4323 		ip_reference(port);
4324 		task_watchports_retain(watchports);
4325 		if (previous_elem_array[j] != NULL) {
4326 			j++;
4327 		}
4328 	}
4329 
4330 	/* Drop the reference on task_watchport struct returned by os_ref_init */
4331 	refs = task_watchports_release(watchports);
4332 	if (refs == 0) {
4333 		task->watchports = NULL;
4334 	}
4335 
4336 	return refs;
4337 }
4338 
4339 /*
4340  *	task_remove_turnstile_watchports_locked:
4341  *		Clear all turnstile boost on the task from watchports.
4342  *
4343  *	Arguments:
4344  *		task: task to remove watchports from
4345  *		watchports: watchports structure for the task
4346  *		port_freelist: array of ports returned with ref to caller
4347  *
4348  *
4349  *	Conditions:
4350  *		ipc space of the task locked.
4351  *		array of ports with refs are returned in port_freelist
4352  */
4353 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4354 task_remove_turnstile_watchports_locked(
4355 	task_t                 task,
4356 	struct task_watchports *watchports,
4357 	ipc_port_t             *port_freelist)
4358 {
4359 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4360 
4361 	for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4362 		ipc_port_t port = watchports->tw_elem[i].twe_port;
4363 		if (port == NULL) {
4364 			continue;
4365 		}
4366 
4367 		/* Lock the port and check if it has the entry */
4368 		ip_mq_lock(port);
4369 		if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4370 		    &watchports->tw_elem[i]) == KERN_SUCCESS) {
4371 			task_watchport_elem_clear(&watchports->tw_elem[i]);
4372 			port_freelist[j++] = port;
4373 			refs = task_watchports_release(watchports);
4374 
4375 			/* Check if all ports are cleaned */
4376 			if (refs == 0) {
4377 				task->watchports = NULL;
4378 				break;
4379 			}
4380 		}
4381 		/* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4382 	}
4383 	return refs;
4384 }
4385 
4386 /*
4387  *	task_watchports_alloc_init:
4388  *		Allocate and initialize task watchport struct.
4389  *
4390  *	Conditions:
4391  *		Nothing locked.
4392  */
4393 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4394 task_watchports_alloc_init(
4395 	task_t        task,
4396 	thread_t      thread,
4397 	uint32_t      count)
4398 {
4399 	struct task_watchports *watchports = kalloc_type(struct task_watchports,
4400 	    struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4401 
4402 	task_reference(task);
4403 	thread_reference(thread);
4404 	watchports->tw_task = task;
4405 	watchports->tw_thread = thread;
4406 	watchports->tw_elem_array_count = count;
4407 	os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4408 
4409 	return watchports;
4410 }
4411 
4412 /*
4413  *	task_watchports_deallocate:
4414  *		Deallocate task watchport struct.
4415  *
4416  *	Conditions:
4417  *		Nothing locked.
4418  */
4419 static void
task_watchports_deallocate(struct task_watchports * watchports)4420 task_watchports_deallocate(
4421 	struct task_watchports *watchports)
4422 {
4423 	uint32_t portwatch_count = watchports->tw_elem_array_count;
4424 
4425 	task_deallocate(watchports->tw_task);
4426 	thread_deallocate(watchports->tw_thread);
4427 	kfree_type(struct task_watchports, struct task_watchport_elem,
4428 	    portwatch_count, watchports);
4429 }
4430 
4431 /*
4432  *	task_watchport_elem_deallocate:
4433  *		Deallocate task watchport element and release its ref on task_watchport.
4434  *
4435  *	Conditions:
4436  *		Nothing locked.
4437  */
4438 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4439 task_watchport_elem_deallocate(
4440 	struct task_watchport_elem *watchport_elem)
4441 {
4442 	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4443 	task_t task = watchport_elem->twe_task;
4444 	struct task_watchports *watchports = NULL;
4445 	ipc_port_t port = NULL;
4446 
4447 	assert(task != NULL);
4448 
4449 	/* Take the space lock to modify the elememt */
4450 	is_write_lock(task->itk_space);
4451 
4452 	watchports = task->watchports;
4453 	assert(watchports != NULL);
4454 
4455 	port = watchport_elem->twe_port;
4456 	assert(port != NULL);
4457 
4458 	task_watchport_elem_clear(watchport_elem);
4459 	refs = task_watchports_release(watchports);
4460 
4461 	if (refs == 0) {
4462 		task->watchports = NULL;
4463 	}
4464 
4465 	is_write_unlock(task->itk_space);
4466 
4467 	ip_release(port);
4468 	if (refs == 0) {
4469 		task_watchports_deallocate(watchports);
4470 	}
4471 }
4472 
4473 /*
4474  *	task_has_watchports:
4475  *		Return TRUE if task has watchport boosts.
4476  *
4477  *	Conditions:
4478  *		Nothing locked.
4479  */
4480 boolean_t
task_has_watchports(task_t task)4481 task_has_watchports(task_t task)
4482 {
4483 	return task->watchports != NULL;
4484 }
4485 
4486 #if DEVELOPMENT || DEBUG
4487 
4488 extern void IOSleep(int);
4489 
4490 kern_return_t
task_disconnect_page_mappings(task_t task)4491 task_disconnect_page_mappings(task_t task)
4492 {
4493 	int     n;
4494 
4495 	if (task == TASK_NULL || task == kernel_task) {
4496 		return KERN_INVALID_ARGUMENT;
4497 	}
4498 
4499 	/*
4500 	 * this function is used to strip all of the mappings from
4501 	 * the pmap for the specified task to force the task to
4502 	 * re-fault all of the pages it is actively using... this
4503 	 * allows us to approximate the true working set of the
4504 	 * specified task.  We only engage if at least 1 of the
4505 	 * threads in the task is runnable, but we want to continuously
4506 	 * sweep (at least for a while - I've arbitrarily set the limit at
4507 	 * 100 sweeps to be re-looked at as we gain experience) to get a better
4508 	 * view into what areas within a page are being visited (as opposed to only
4509 	 * seeing the first fault of a page after the task becomes
4510 	 * runnable)...  in the future I may
4511 	 * try to block until awakened by a thread in this task
4512 	 * being made runnable, but for now we'll periodically poll from the
4513 	 * user level debug tool driving the sysctl
4514 	 */
4515 	for (n = 0; n < 100; n++) {
4516 		thread_t        thread;
4517 		boolean_t       runnable;
4518 		boolean_t       do_unnest;
4519 		int             page_count;
4520 
4521 		runnable = FALSE;
4522 		do_unnest = FALSE;
4523 
4524 		task_lock(task);
4525 
4526 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
4527 			if (thread->state & TH_RUN) {
4528 				runnable = TRUE;
4529 				break;
4530 			}
4531 		}
4532 		if (n == 0) {
4533 			task->task_disconnected_count++;
4534 		}
4535 
4536 		if (task->task_unnested == FALSE) {
4537 			if (runnable == TRUE) {
4538 				task->task_unnested = TRUE;
4539 				do_unnest = TRUE;
4540 			}
4541 		}
4542 		task_unlock(task);
4543 
4544 		if (runnable == FALSE) {
4545 			break;
4546 		}
4547 
4548 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4549 		    task, do_unnest, task->task_disconnected_count, 0, 0);
4550 
4551 		page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4552 
4553 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4554 		    task, page_count, 0, 0, 0);
4555 
4556 		if ((n % 5) == 4) {
4557 			IOSleep(1);
4558 		}
4559 	}
4560 	return KERN_SUCCESS;
4561 }
4562 
4563 #endif
4564 
4565 
4566 #if CONFIG_FREEZE
4567 
4568 /*
4569  *	task_freeze:
4570  *
4571  *	Freeze a task.
4572  *
4573  * Conditions:
4574  *      The caller holds a reference to the task
4575  */
4576 extern void             vm_wake_compactor_swapper(void);
4577 extern queue_head_t     c_swapout_list_head;
4578 extern struct freezer_context freezer_context_global;
4579 
4580 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4581 task_freeze(
4582 	task_t    task,
4583 	uint32_t           *purgeable_count,
4584 	uint32_t           *wired_count,
4585 	uint32_t           *clean_count,
4586 	uint32_t           *dirty_count,
4587 	uint32_t           dirty_budget,
4588 	uint32_t           *shared_count,
4589 	int                *freezer_error_code,
4590 	boolean_t          eval_only)
4591 {
4592 	kern_return_t kr = KERN_SUCCESS;
4593 
4594 	if (task == TASK_NULL || task == kernel_task) {
4595 		return KERN_INVALID_ARGUMENT;
4596 	}
4597 
4598 	task_lock(task);
4599 
4600 	while (task->changing_freeze_state) {
4601 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4602 		task_unlock(task);
4603 		thread_block(THREAD_CONTINUE_NULL);
4604 
4605 		task_lock(task);
4606 	}
4607 	if (task->frozen) {
4608 		task_unlock(task);
4609 		return KERN_FAILURE;
4610 	}
4611 	task->changing_freeze_state = TRUE;
4612 
4613 	freezer_context_global.freezer_ctx_task = task;
4614 
4615 	task_unlock(task);
4616 
4617 	kr = vm_map_freeze(task,
4618 	    purgeable_count,
4619 	    wired_count,
4620 	    clean_count,
4621 	    dirty_count,
4622 	    dirty_budget,
4623 	    shared_count,
4624 	    freezer_error_code,
4625 	    eval_only);
4626 
4627 	task_lock(task);
4628 
4629 	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4630 		task->frozen = TRUE;
4631 
4632 		freezer_context_global.freezer_ctx_task = NULL;
4633 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4634 
4635 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4636 			/*
4637 			 * reset the counter tracking the # of swapped compressed pages
4638 			 * because we are now done with this freeze session and task.
4639 			 */
4640 
4641 			*dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64);         /*used to track pageouts*/
4642 		}
4643 
4644 		freezer_context_global.freezer_ctx_swapped_bytes = 0;
4645 	}
4646 
4647 	task->changing_freeze_state = FALSE;
4648 	thread_wakeup(&task->changing_freeze_state);
4649 
4650 	task_unlock(task);
4651 
4652 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4653 	    (kr == KERN_SUCCESS) &&
4654 	    (eval_only == FALSE)) {
4655 		vm_wake_compactor_swapper();
4656 		/*
4657 		 * We do an explicit wakeup of the swapout thread here
4658 		 * because the compact_and_swap routines don't have
4659 		 * knowledge about these kind of "per-task packed c_segs"
4660 		 * and so will not be evaluating whether we need to do
4661 		 * a wakeup there.
4662 		 */
4663 		thread_wakeup((event_t)&c_swapout_list_head);
4664 	}
4665 
4666 	return kr;
4667 }
4668 
4669 /*
4670  *	task_thaw:
4671  *
4672  *	Thaw a currently frozen task.
4673  *
4674  * Conditions:
4675  *      The caller holds a reference to the task
4676  */
4677 kern_return_t
task_thaw(task_t task)4678 task_thaw(
4679 	task_t          task)
4680 {
4681 	if (task == TASK_NULL || task == kernel_task) {
4682 		return KERN_INVALID_ARGUMENT;
4683 	}
4684 
4685 	task_lock(task);
4686 
4687 	while (task->changing_freeze_state) {
4688 		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4689 		task_unlock(task);
4690 		thread_block(THREAD_CONTINUE_NULL);
4691 
4692 		task_lock(task);
4693 	}
4694 	if (!task->frozen) {
4695 		task_unlock(task);
4696 		return KERN_FAILURE;
4697 	}
4698 	task->frozen = FALSE;
4699 
4700 	task_unlock(task);
4701 
4702 	return KERN_SUCCESS;
4703 }
4704 
4705 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)4706 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
4707 {
4708 	/*
4709 	 * We don't assert that the task lock is held because we call this
4710 	 * routine from the decompression path and we won't be holding the
4711 	 * task lock. However, since we are in the context of the task we are
4712 	 * safe.
4713 	 * In the case of the task_freeze path, we call it from behind the task
4714 	 * lock but we don't need to because we have a reference on the proc
4715 	 * being frozen.
4716 	 */
4717 
4718 	assert(task);
4719 	if (amount == 0) {
4720 		return;
4721 	}
4722 
4723 	if (op == CREDIT_TO_SWAP) {
4724 		ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4725 	} else if (op == DEBIT_FROM_SWAP) {
4726 		ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4727 	} else {
4728 		panic("task_update_frozen_to_swap_acct: Invalid ledger op");
4729 	}
4730 }
4731 #endif /* CONFIG_FREEZE */
4732 
4733 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)4734 task_set_security_tokens(
4735 	task_t           task,
4736 	security_token_t sec_token,
4737 	audit_token_t    audit_token,
4738 	host_priv_t      host_priv)
4739 {
4740 	ipc_port_t       host_port;
4741 	kern_return_t    kr;
4742 
4743 	if (task == TASK_NULL) {
4744 		return KERN_INVALID_ARGUMENT;
4745 	}
4746 
4747 	task_lock(task);
4748 	task_set_tokens(task, &sec_token, &audit_token);
4749 	task_unlock(task);
4750 
4751 	if (host_priv != HOST_PRIV_NULL) {
4752 		kr = host_get_host_priv_port(host_priv, &host_port);
4753 	} else {
4754 		kr = host_get_host_port(host_priv_self(), &host_port);
4755 	}
4756 	assert(kr == KERN_SUCCESS);
4757 
4758 	kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4759 	return kr;
4760 }
4761 
4762 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)4763 task_send_trace_memory(
4764 	__unused task_t   target_task,
4765 	__unused uint32_t pid,
4766 	__unused uint64_t uniqueid)
4767 {
4768 	return KERN_INVALID_ARGUMENT;
4769 }
4770 
4771 /*
4772  * This routine was added, pretty much exclusively, for registering the
4773  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4774  * removing it completely, I have only disabled that feature (which was
4775  * the only feature at the time).  It just appears that we are going to
4776  * want to add some user data to tasks in the future (i.e. bsd info,
4777  * task names, etc...), so I left it in the formal task interface.
4778  */
4779 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)4780 task_set_info(
4781 	task_t          task,
4782 	task_flavor_t   flavor,
4783 	__unused task_info_t    task_info_in,           /* pointer to IN array */
4784 	__unused mach_msg_type_number_t task_info_count)
4785 {
4786 	if (task == TASK_NULL) {
4787 		return KERN_INVALID_ARGUMENT;
4788 	}
4789 	switch (flavor) {
4790 #if CONFIG_ATM
4791 	case TASK_TRACE_MEMORY_INFO:
4792 		return KERN_NOT_SUPPORTED;
4793 #endif // CONFIG_ATM
4794 	default:
4795 		return KERN_INVALID_ARGUMENT;
4796 	}
4797 }
4798 
4799 int radar_20146450 = 1;
4800 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)4801 task_info(
4802 	task_t                  task,
4803 	task_flavor_t           flavor,
4804 	task_info_t             task_info_out,
4805 	mach_msg_type_number_t  *task_info_count)
4806 {
4807 	kern_return_t error = KERN_SUCCESS;
4808 	mach_msg_type_number_t  original_task_info_count;
4809 	bool is_kernel_task = (task == kernel_task);
4810 
4811 	if (task == TASK_NULL) {
4812 		return KERN_INVALID_ARGUMENT;
4813 	}
4814 
4815 	original_task_info_count = *task_info_count;
4816 	task_lock(task);
4817 
4818 	if ((task != current_task()) && (!task->active)) {
4819 		task_unlock(task);
4820 		return KERN_INVALID_ARGUMENT;
4821 	}
4822 
4823 
4824 	switch (flavor) {
4825 	case TASK_BASIC_INFO_32:
4826 	case TASK_BASIC2_INFO_32:
4827 #if defined(__arm__) || defined(__arm64__)
4828 	case TASK_BASIC_INFO_64:
4829 #endif
4830 		{
4831 			task_basic_info_32_t    basic_info;
4832 			vm_map_t                                map;
4833 			clock_sec_t                             secs;
4834 			clock_usec_t                    usecs;
4835 			ledger_amount_t tmp;
4836 
4837 			if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4838 				error = KERN_INVALID_ARGUMENT;
4839 				break;
4840 			}
4841 
4842 			basic_info = (task_basic_info_32_t)task_info_out;
4843 
4844 			map = (task == kernel_task)? kernel_map: task->map;
4845 			basic_info->virtual_size = (typeof(basic_info->virtual_size))vm_map_adjusted_size(map);
4846 			if (flavor == TASK_BASIC2_INFO_32) {
4847 				/*
4848 				 * The "BASIC2" flavor gets the maximum resident
4849 				 * size instead of the current resident size...
4850 				 */
4851 				ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
4852 			} else {
4853 				ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
4854 			}
4855 			basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
4856 
4857 			basic_info->policy = ((task != kernel_task)?
4858 			    POLICY_TIMESHARE: POLICY_RR);
4859 			basic_info->suspend_count = task->user_stop_count;
4860 
4861 			absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4862 			basic_info->user_time.seconds =
4863 			    (typeof(basic_info->user_time.seconds))secs;
4864 			basic_info->user_time.microseconds = usecs;
4865 
4866 			absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4867 			basic_info->system_time.seconds =
4868 			    (typeof(basic_info->system_time.seconds))secs;
4869 			basic_info->system_time.microseconds = usecs;
4870 
4871 			*task_info_count = TASK_BASIC_INFO_32_COUNT;
4872 			break;
4873 		}
4874 
4875 #if defined(__arm__) || defined(__arm64__)
4876 	case TASK_BASIC_INFO_64_2:
4877 	{
4878 		task_basic_info_64_2_t  basic_info;
4879 		vm_map_t                                map;
4880 		clock_sec_t                             secs;
4881 		clock_usec_t                    usecs;
4882 
4883 		if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4884 			error = KERN_INVALID_ARGUMENT;
4885 			break;
4886 		}
4887 
4888 		basic_info = (task_basic_info_64_2_t)task_info_out;
4889 
4890 		map = (task == kernel_task)? kernel_map: task->map;
4891 		basic_info->virtual_size  = vm_map_adjusted_size(map);
4892 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
4893 
4894 		basic_info->policy = ((task != kernel_task)?
4895 		    POLICY_TIMESHARE: POLICY_RR);
4896 		basic_info->suspend_count = task->user_stop_count;
4897 
4898 		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4899 		basic_info->user_time.seconds =
4900 		    (typeof(basic_info->user_time.seconds))secs;
4901 		basic_info->user_time.microseconds = usecs;
4902 
4903 		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4904 		basic_info->system_time.seconds =
4905 		    (typeof(basic_info->system_time.seconds))secs;
4906 		basic_info->system_time.microseconds = usecs;
4907 
4908 		*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4909 		break;
4910 	}
4911 
4912 #else /* defined(__arm__) || defined(__arm64__) */
4913 	case TASK_BASIC_INFO_64:
4914 	{
4915 		task_basic_info_64_t    basic_info;
4916 		vm_map_t                                map;
4917 		clock_sec_t                             secs;
4918 		clock_usec_t                    usecs;
4919 
4920 		if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4921 			error = KERN_INVALID_ARGUMENT;
4922 			break;
4923 		}
4924 
4925 		basic_info = (task_basic_info_64_t)task_info_out;
4926 
4927 		map = (task == kernel_task)? kernel_map: task->map;
4928 		basic_info->virtual_size  = vm_map_adjusted_size(map);
4929 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
4930 
4931 		basic_info->policy = ((task != kernel_task)?
4932 		    POLICY_TIMESHARE: POLICY_RR);
4933 		basic_info->suspend_count = task->user_stop_count;
4934 
4935 		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4936 		basic_info->user_time.seconds =
4937 		    (typeof(basic_info->user_time.seconds))secs;
4938 		basic_info->user_time.microseconds = usecs;
4939 
4940 		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4941 		basic_info->system_time.seconds =
4942 		    (typeof(basic_info->system_time.seconds))secs;
4943 		basic_info->system_time.microseconds = usecs;
4944 
4945 		*task_info_count = TASK_BASIC_INFO_64_COUNT;
4946 		break;
4947 	}
4948 #endif /* defined(__arm__) || defined(__arm64__) */
4949 
4950 	case MACH_TASK_BASIC_INFO:
4951 	{
4952 		mach_task_basic_info_t  basic_info;
4953 		vm_map_t                map;
4954 		clock_sec_t             secs;
4955 		clock_usec_t            usecs;
4956 
4957 		if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4958 			error = KERN_INVALID_ARGUMENT;
4959 			break;
4960 		}
4961 
4962 		basic_info = (mach_task_basic_info_t)task_info_out;
4963 
4964 		map = (task == kernel_task) ? kernel_map : task->map;
4965 
4966 		basic_info->virtual_size  = vm_map_adjusted_size(map);
4967 
4968 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
4969 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
4970 
4971 		basic_info->policy = ((task != kernel_task) ?
4972 		    POLICY_TIMESHARE : POLICY_RR);
4973 
4974 		basic_info->suspend_count = task->user_stop_count;
4975 
4976 		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4977 		basic_info->user_time.seconds =
4978 		    (typeof(basic_info->user_time.seconds))secs;
4979 		basic_info->user_time.microseconds = usecs;
4980 
4981 		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4982 		basic_info->system_time.seconds =
4983 		    (typeof(basic_info->system_time.seconds))secs;
4984 		basic_info->system_time.microseconds = usecs;
4985 
4986 		*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4987 		break;
4988 	}
4989 
4990 	case TASK_THREAD_TIMES_INFO:
4991 	{
4992 		task_thread_times_info_t        times_info;
4993 		thread_t                                        thread;
4994 
4995 		if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4996 			error = KERN_INVALID_ARGUMENT;
4997 			break;
4998 		}
4999 
5000 		times_info = (task_thread_times_info_t) task_info_out;
5001 		times_info->user_time.seconds = 0;
5002 		times_info->user_time.microseconds = 0;
5003 		times_info->system_time.seconds = 0;
5004 		times_info->system_time.microseconds = 0;
5005 
5006 
5007 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5008 			time_value_t    user_time, system_time;
5009 
5010 			if (thread->options & TH_OPT_IDLE_THREAD) {
5011 				continue;
5012 			}
5013 
5014 			thread_read_times(thread, &user_time, &system_time, NULL);
5015 
5016 			time_value_add(&times_info->user_time, &user_time);
5017 			time_value_add(&times_info->system_time, &system_time);
5018 		}
5019 
5020 		*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5021 		break;
5022 	}
5023 
5024 	case TASK_ABSOLUTETIME_INFO:
5025 	{
5026 		task_absolutetime_info_t        info;
5027 		thread_t                        thread;
5028 
5029 		if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5030 			error = KERN_INVALID_ARGUMENT;
5031 			break;
5032 		}
5033 
5034 		info = (task_absolutetime_info_t)task_info_out;
5035 		info->threads_user = info->threads_system = 0;
5036 
5037 
5038 		info->total_user = task->total_user_time;
5039 		info->total_system = task->total_system_time;
5040 
5041 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5042 			uint64_t        tval;
5043 			spl_t           x;
5044 
5045 			if (thread->options & TH_OPT_IDLE_THREAD) {
5046 				continue;
5047 			}
5048 
5049 			x = splsched();
5050 			thread_lock(thread);
5051 
5052 			tval = timer_grab(&thread->user_timer);
5053 			info->threads_user += tval;
5054 			info->total_user += tval;
5055 
5056 			tval = timer_grab(&thread->system_timer);
5057 			if (thread->precise_user_kernel_time) {
5058 				info->threads_system += tval;
5059 				info->total_system += tval;
5060 			} else {
5061 				/* system_timer may represent either sys or user */
5062 				info->threads_user += tval;
5063 				info->total_user += tval;
5064 			}
5065 
5066 			thread_unlock(thread);
5067 			splx(x);
5068 		}
5069 
5070 
5071 		*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5072 		break;
5073 	}
5074 
5075 	case TASK_DYLD_INFO:
5076 	{
5077 		task_dyld_info_t info;
5078 
5079 		/*
5080 		 * We added the format field to TASK_DYLD_INFO output.  For
5081 		 * temporary backward compatibility, accept the fact that
5082 		 * clients may ask for the old version - distinquished by the
5083 		 * size of the expected result structure.
5084 		 */
5085 #define TASK_LEGACY_DYLD_INFO_COUNT \
5086 	        offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5087 
5088 		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5089 			error = KERN_INVALID_ARGUMENT;
5090 			break;
5091 		}
5092 
5093 		info = (task_dyld_info_t)task_info_out;
5094 		info->all_image_info_addr = task->all_image_info_addr;
5095 		info->all_image_info_size = task->all_image_info_size;
5096 
5097 		/* only set format on output for those expecting it */
5098 		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5099 			info->all_image_info_format = task_has_64Bit_addr(task) ?
5100 			    TASK_DYLD_ALL_IMAGE_INFO_64 :
5101 			    TASK_DYLD_ALL_IMAGE_INFO_32;
5102 			*task_info_count = TASK_DYLD_INFO_COUNT;
5103 		} else {
5104 			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5105 		}
5106 		break;
5107 	}
5108 
5109 	case TASK_EXTMOD_INFO:
5110 	{
5111 		task_extmod_info_t info;
5112 		void *p;
5113 
5114 		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5115 			error = KERN_INVALID_ARGUMENT;
5116 			break;
5117 		}
5118 
5119 		info = (task_extmod_info_t)task_info_out;
5120 
5121 		p = get_bsdtask_info(task);
5122 		if (p) {
5123 			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5124 		} else {
5125 			bzero(info->task_uuid, sizeof(info->task_uuid));
5126 		}
5127 		info->extmod_statistics = task->extmod_statistics;
5128 		*task_info_count = TASK_EXTMOD_INFO_COUNT;
5129 
5130 		break;
5131 	}
5132 
5133 	case TASK_KERNELMEMORY_INFO:
5134 	{
5135 		task_kernelmemory_info_t        tkm_info;
5136 		ledger_amount_t                 credit, debit;
5137 
5138 		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5139 			error = KERN_INVALID_ARGUMENT;
5140 			break;
5141 		}
5142 
5143 		tkm_info = (task_kernelmemory_info_t) task_info_out;
5144 		tkm_info->total_palloc = 0;
5145 		tkm_info->total_pfree = 0;
5146 		tkm_info->total_salloc = 0;
5147 		tkm_info->total_sfree = 0;
5148 
5149 		if (task == kernel_task) {
5150 			/*
5151 			 * All shared allocs/frees from other tasks count against
5152 			 * the kernel private memory usage.  If we are looking up
5153 			 * info for the kernel task, gather from everywhere.
5154 			 */
5155 			task_unlock(task);
5156 
5157 			/* start by accounting for all the terminated tasks against the kernel */
5158 			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5159 			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5160 
5161 			/* count all other task/thread shared alloc/free against the kernel */
5162 			lck_mtx_lock(&tasks_threads_lock);
5163 
5164 			/* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5165 			queue_iterate(&tasks, task, task_t, tasks) {
5166 				if (task == kernel_task) {
5167 					if (ledger_get_entries(task->ledger,
5168 					    task_ledgers.tkm_private, &credit,
5169 					    &debit) == KERN_SUCCESS) {
5170 						tkm_info->total_palloc += credit;
5171 						tkm_info->total_pfree += debit;
5172 					}
5173 				}
5174 				if (!ledger_get_entries(task->ledger,
5175 				    task_ledgers.tkm_shared, &credit, &debit)) {
5176 					tkm_info->total_palloc += credit;
5177 					tkm_info->total_pfree += debit;
5178 				}
5179 			}
5180 			lck_mtx_unlock(&tasks_threads_lock);
5181 		} else {
5182 			if (!ledger_get_entries(task->ledger,
5183 			    task_ledgers.tkm_private, &credit, &debit)) {
5184 				tkm_info->total_palloc = credit;
5185 				tkm_info->total_pfree = debit;
5186 			}
5187 			if (!ledger_get_entries(task->ledger,
5188 			    task_ledgers.tkm_shared, &credit, &debit)) {
5189 				tkm_info->total_salloc = credit;
5190 				tkm_info->total_sfree = debit;
5191 			}
5192 			task_unlock(task);
5193 		}
5194 
5195 		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5196 		return KERN_SUCCESS;
5197 	}
5198 
5199 	/* OBSOLETE */
5200 	case TASK_SCHED_FIFO_INFO:
5201 	{
5202 		if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5203 			error = KERN_INVALID_ARGUMENT;
5204 			break;
5205 		}
5206 
5207 		error = KERN_INVALID_POLICY;
5208 		break;
5209 	}
5210 
5211 	/* OBSOLETE */
5212 	case TASK_SCHED_RR_INFO:
5213 	{
5214 		policy_rr_base_t        rr_base;
5215 		uint32_t quantum_time;
5216 		uint64_t quantum_ns;
5217 
5218 		if (*task_info_count < POLICY_RR_BASE_COUNT) {
5219 			error = KERN_INVALID_ARGUMENT;
5220 			break;
5221 		}
5222 
5223 		rr_base = (policy_rr_base_t) task_info_out;
5224 
5225 		if (task != kernel_task) {
5226 			error = KERN_INVALID_POLICY;
5227 			break;
5228 		}
5229 
5230 		rr_base->base_priority = task->priority;
5231 
5232 		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5233 		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5234 
5235 		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5236 
5237 		*task_info_count = POLICY_RR_BASE_COUNT;
5238 		break;
5239 	}
5240 
5241 	/* OBSOLETE */
5242 	case TASK_SCHED_TIMESHARE_INFO:
5243 	{
5244 		policy_timeshare_base_t ts_base;
5245 
5246 		if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5247 			error = KERN_INVALID_ARGUMENT;
5248 			break;
5249 		}
5250 
5251 		ts_base = (policy_timeshare_base_t) task_info_out;
5252 
5253 		if (task == kernel_task) {
5254 			error = KERN_INVALID_POLICY;
5255 			break;
5256 		}
5257 
5258 		ts_base->base_priority = task->priority;
5259 
5260 		*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5261 		break;
5262 	}
5263 
5264 	case TASK_SECURITY_TOKEN:
5265 	{
5266 		security_token_t        *sec_token_p;
5267 
5268 		if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5269 			error = KERN_INVALID_ARGUMENT;
5270 			break;
5271 		}
5272 
5273 		sec_token_p = (security_token_t *) task_info_out;
5274 
5275 		*sec_token_p = *task_get_sec_token(task);
5276 
5277 		*task_info_count = TASK_SECURITY_TOKEN_COUNT;
5278 		break;
5279 	}
5280 
5281 	case TASK_AUDIT_TOKEN:
5282 	{
5283 		audit_token_t   *audit_token_p;
5284 
5285 		if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5286 			error = KERN_INVALID_ARGUMENT;
5287 			break;
5288 		}
5289 
5290 		audit_token_p = (audit_token_t *) task_info_out;
5291 
5292 		*audit_token_p = *task_get_audit_token(task);
5293 
5294 		*task_info_count = TASK_AUDIT_TOKEN_COUNT;
5295 		break;
5296 	}
5297 
5298 	case TASK_SCHED_INFO:
5299 		error = KERN_INVALID_ARGUMENT;
5300 		break;
5301 
5302 	case TASK_EVENTS_INFO:
5303 	{
5304 		task_events_info_t      events_info;
5305 		thread_t                thread;
5306 		uint64_t                n_syscalls_mach, n_syscalls_unix, n_csw;
5307 
5308 		if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5309 			error = KERN_INVALID_ARGUMENT;
5310 			break;
5311 		}
5312 
5313 		events_info = (task_events_info_t) task_info_out;
5314 
5315 
5316 		events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5317 		events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5318 		events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5319 		events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5320 		events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5321 
5322 		n_syscalls_mach = task->syscalls_mach;
5323 		n_syscalls_unix = task->syscalls_unix;
5324 		n_csw = task->c_switch;
5325 
5326 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
5327 			n_csw           += thread->c_switch;
5328 			n_syscalls_mach += thread->syscalls_mach;
5329 			n_syscalls_unix += thread->syscalls_unix;
5330 		}
5331 
5332 		events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5333 		events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5334 		events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5335 
5336 		*task_info_count = TASK_EVENTS_INFO_COUNT;
5337 		break;
5338 	}
5339 	case TASK_AFFINITY_TAG_INFO:
5340 	{
5341 		if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5342 			error = KERN_INVALID_ARGUMENT;
5343 			break;
5344 		}
5345 
5346 		error = task_affinity_info(task, task_info_out, task_info_count);
5347 		break;
5348 	}
5349 	case TASK_POWER_INFO:
5350 	{
5351 		if (*task_info_count < TASK_POWER_INFO_COUNT) {
5352 			error = KERN_INVALID_ARGUMENT;
5353 			break;
5354 		}
5355 
5356 		task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5357 		break;
5358 	}
5359 
5360 	case TASK_POWER_INFO_V2:
5361 	{
5362 		if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5363 			error = KERN_INVALID_ARGUMENT;
5364 			break;
5365 		}
5366 		task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5367 		task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5368 		break;
5369 	}
5370 
5371 	case TASK_VM_INFO:
5372 	case TASK_VM_INFO_PURGEABLE:
5373 	{
5374 		task_vm_info_t          vm_info;
5375 		vm_map_t                map;
5376 		ledger_amount_t         tmp_amount;
5377 
5378 #if __arm64__
5379 		struct proc *p;
5380 		uint32_t platform, sdk;
5381 		p = current_proc();
5382 		platform = proc_platform(p);
5383 		sdk = proc_min_sdk(p);
5384 		if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5385 		    platform == PLATFORM_IOS &&
5386 		    sdk != 0 &&
5387 		    (sdk >> 16) <= 12) {
5388 			/*
5389 			 * Some iOS apps pass an incorrect value for
5390 			 * task_info_count, expressed in number of bytes
5391 			 * instead of number of "natural_t" elements.
5392 			 * For the sake of backwards binary compatibility
5393 			 * for apps built with an iOS12 or older SDK and using
5394 			 * the "rev2" data structure, let's fix task_info_count
5395 			 * for them, to avoid stomping past the actual end
5396 			 * of their buffer.
5397 			 */
5398 #if DEVELOPMENT || DEBUG
5399 			printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5400 #endif /* DEVELOPMENT || DEBUG */
5401 			DTRACE_VM4(workaround_task_vm_info_count,
5402 			    mach_msg_type_number_t, original_task_info_count,
5403 			    mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5404 			    uint32_t, platform,
5405 			    uint32_t, sdk);
5406 			original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5407 			*task_info_count = original_task_info_count;
5408 		}
5409 #endif /* __arm64__ */
5410 
5411 		if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5412 			error = KERN_INVALID_ARGUMENT;
5413 			break;
5414 		}
5415 
5416 		vm_info = (task_vm_info_t)task_info_out;
5417 
5418 		/*
5419 		 * Do not hold both the task and map locks,
5420 		 * so convert the task lock into a map reference,
5421 		 * drop the task lock, then lock the map.
5422 		 */
5423 		if (is_kernel_task) {
5424 			map = kernel_map;
5425 			task_unlock(task);
5426 			/* no lock, no reference */
5427 		} else {
5428 			map = task->map;
5429 			vm_map_reference(map);
5430 			task_unlock(task);
5431 			vm_map_lock_read(map);
5432 		}
5433 
5434 		vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5435 		vm_info->region_count = map->hdr.nentries;
5436 		vm_info->page_size = vm_map_page_size(map);
5437 
5438 		ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5439 		ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5440 
5441 		vm_info->device = 0;
5442 		vm_info->device_peak = 0;
5443 		ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5444 		ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5445 		ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5446 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5447 		ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5448 		ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5449 		ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5450 		ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5451 		ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5452 
5453 		vm_info->purgeable_volatile_pmap = 0;
5454 		vm_info->purgeable_volatile_resident = 0;
5455 		vm_info->purgeable_volatile_virtual = 0;
5456 		if (is_kernel_task) {
5457 			/*
5458 			 * We do not maintain the detailed stats for the
5459 			 * kernel_pmap, so just count everything as
5460 			 * "internal"...
5461 			 */
5462 			vm_info->internal = vm_info->resident_size;
5463 			/*
5464 			 * ... but since the memory held by the VM compressor
5465 			 * in the kernel address space ought to be attributed
5466 			 * to user-space tasks, we subtract it from "internal"
5467 			 * to give memory reporting tools a more accurate idea
5468 			 * of what the kernel itself is actually using, instead
5469 			 * of making it look like the kernel is leaking memory
5470 			 * when the system is under memory pressure.
5471 			 */
5472 			vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5473 			    PAGE_SIZE);
5474 		} else {
5475 			mach_vm_size_t  volatile_virtual_size;
5476 			mach_vm_size_t  volatile_resident_size;
5477 			mach_vm_size_t  volatile_compressed_size;
5478 			mach_vm_size_t  volatile_pmap_size;
5479 			mach_vm_size_t  volatile_compressed_pmap_size;
5480 			kern_return_t   kr;
5481 
5482 			if (flavor == TASK_VM_INFO_PURGEABLE) {
5483 				kr = vm_map_query_volatile(
5484 					map,
5485 					&volatile_virtual_size,
5486 					&volatile_resident_size,
5487 					&volatile_compressed_size,
5488 					&volatile_pmap_size,
5489 					&volatile_compressed_pmap_size);
5490 				if (kr == KERN_SUCCESS) {
5491 					vm_info->purgeable_volatile_pmap =
5492 					    volatile_pmap_size;
5493 					if (radar_20146450) {
5494 						vm_info->compressed -=
5495 						    volatile_compressed_pmap_size;
5496 					}
5497 					vm_info->purgeable_volatile_resident =
5498 					    volatile_resident_size;
5499 					vm_info->purgeable_volatile_virtual =
5500 					    volatile_virtual_size;
5501 				}
5502 			}
5503 		}
5504 		*task_info_count = TASK_VM_INFO_REV0_COUNT;
5505 
5506 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5507 			/* must be captured while we still have the map lock */
5508 			vm_info->min_address = map->min_offset;
5509 			vm_info->max_address = map->max_offset;
5510 		}
5511 
5512 		/*
5513 		 * Done with vm map things, can drop the map lock and reference,
5514 		 * and take the task lock back.
5515 		 *
5516 		 * Re-validate that the task didn't die on us.
5517 		 */
5518 		if (!is_kernel_task) {
5519 			vm_map_unlock_read(map);
5520 			vm_map_deallocate(map);
5521 		}
5522 		map = VM_MAP_NULL;
5523 
5524 		task_lock(task);
5525 
5526 		if ((task != current_task()) && (!task->active)) {
5527 			error = KERN_INVALID_ARGUMENT;
5528 			break;
5529 		}
5530 
5531 		if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5532 			vm_info->phys_footprint =
5533 			    (mach_vm_size_t) get_task_phys_footprint(task);
5534 			*task_info_count = TASK_VM_INFO_REV1_COUNT;
5535 		}
5536 		if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5537 			/* data was captured above */
5538 			*task_info_count = TASK_VM_INFO_REV2_COUNT;
5539 		}
5540 
5541 		if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5542 			ledger_get_lifetime_max(task->ledger,
5543 			    task_ledgers.phys_footprint,
5544 			    &vm_info->ledger_phys_footprint_peak);
5545 			ledger_get_balance(task->ledger,
5546 			    task_ledgers.purgeable_nonvolatile,
5547 			    &vm_info->ledger_purgeable_nonvolatile);
5548 			ledger_get_balance(task->ledger,
5549 			    task_ledgers.purgeable_nonvolatile_compressed,
5550 			    &vm_info->ledger_purgeable_novolatile_compressed);
5551 			ledger_get_balance(task->ledger,
5552 			    task_ledgers.purgeable_volatile,
5553 			    &vm_info->ledger_purgeable_volatile);
5554 			ledger_get_balance(task->ledger,
5555 			    task_ledgers.purgeable_volatile_compressed,
5556 			    &vm_info->ledger_purgeable_volatile_compressed);
5557 			ledger_get_balance(task->ledger,
5558 			    task_ledgers.network_nonvolatile,
5559 			    &vm_info->ledger_tag_network_nonvolatile);
5560 			ledger_get_balance(task->ledger,
5561 			    task_ledgers.network_nonvolatile_compressed,
5562 			    &vm_info->ledger_tag_network_nonvolatile_compressed);
5563 			ledger_get_balance(task->ledger,
5564 			    task_ledgers.network_volatile,
5565 			    &vm_info->ledger_tag_network_volatile);
5566 			ledger_get_balance(task->ledger,
5567 			    task_ledgers.network_volatile_compressed,
5568 			    &vm_info->ledger_tag_network_volatile_compressed);
5569 			ledger_get_balance(task->ledger,
5570 			    task_ledgers.media_footprint,
5571 			    &vm_info->ledger_tag_media_footprint);
5572 			ledger_get_balance(task->ledger,
5573 			    task_ledgers.media_footprint_compressed,
5574 			    &vm_info->ledger_tag_media_footprint_compressed);
5575 			ledger_get_balance(task->ledger,
5576 			    task_ledgers.media_nofootprint,
5577 			    &vm_info->ledger_tag_media_nofootprint);
5578 			ledger_get_balance(task->ledger,
5579 			    task_ledgers.media_nofootprint_compressed,
5580 			    &vm_info->ledger_tag_media_nofootprint_compressed);
5581 			ledger_get_balance(task->ledger,
5582 			    task_ledgers.graphics_footprint,
5583 			    &vm_info->ledger_tag_graphics_footprint);
5584 			ledger_get_balance(task->ledger,
5585 			    task_ledgers.graphics_footprint_compressed,
5586 			    &vm_info->ledger_tag_graphics_footprint_compressed);
5587 			ledger_get_balance(task->ledger,
5588 			    task_ledgers.graphics_nofootprint,
5589 			    &vm_info->ledger_tag_graphics_nofootprint);
5590 			ledger_get_balance(task->ledger,
5591 			    task_ledgers.graphics_nofootprint_compressed,
5592 			    &vm_info->ledger_tag_graphics_nofootprint_compressed);
5593 			ledger_get_balance(task->ledger,
5594 			    task_ledgers.neural_footprint,
5595 			    &vm_info->ledger_tag_neural_footprint);
5596 			ledger_get_balance(task->ledger,
5597 			    task_ledgers.neural_footprint_compressed,
5598 			    &vm_info->ledger_tag_neural_footprint_compressed);
5599 			ledger_get_balance(task->ledger,
5600 			    task_ledgers.neural_nofootprint,
5601 			    &vm_info->ledger_tag_neural_nofootprint);
5602 			ledger_get_balance(task->ledger,
5603 			    task_ledgers.neural_nofootprint_compressed,
5604 			    &vm_info->ledger_tag_neural_nofootprint_compressed);
5605 			*task_info_count = TASK_VM_INFO_REV3_COUNT;
5606 		}
5607 		if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5608 			if (task->bsd_info) {
5609 				vm_info->limit_bytes_remaining =
5610 				    memorystatus_available_memory_internal(task->bsd_info);
5611 			} else {
5612 				vm_info->limit_bytes_remaining = 0;
5613 			}
5614 			*task_info_count = TASK_VM_INFO_REV4_COUNT;
5615 		}
5616 		if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5617 			thread_t thread;
5618 			uint64_t total = task->decompressions;
5619 			queue_iterate(&task->threads, thread, thread_t, task_threads) {
5620 				total += thread->decompressions;
5621 			}
5622 			vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
5623 			*task_info_count = TASK_VM_INFO_REV5_COUNT;
5624 		}
5625 		if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
5626 			ledger_get_balance(task->ledger, task_ledgers.swapins,
5627 			    &vm_info->ledger_swapins);
5628 			*task_info_count = TASK_VM_INFO_REV6_COUNT;
5629 		}
5630 
5631 		break;
5632 	}
5633 
5634 	case TASK_WAIT_STATE_INFO:
5635 	{
5636 		/*
5637 		 * Deprecated flavor. Currently allowing some results until all users
5638 		 * stop calling it. The results may not be accurate.
5639 		 */
5640 		task_wait_state_info_t  wait_state_info;
5641 		uint64_t total_sfi_ledger_val = 0;
5642 
5643 		if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5644 			error = KERN_INVALID_ARGUMENT;
5645 			break;
5646 		}
5647 
5648 		wait_state_info = (task_wait_state_info_t) task_info_out;
5649 
5650 		wait_state_info->total_wait_state_time = 0;
5651 		bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5652 
5653 #if CONFIG_SCHED_SFI
5654 		int i, prev_lentry = -1;
5655 		int64_t  val_credit, val_debit;
5656 
5657 		for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5658 			val_credit = 0;
5659 			/*
5660 			 * checking with prev_lentry != entry ensures adjacent classes
5661 			 * which share the same ledger do not add wait times twice.
5662 			 * Note: Use ledger() call to get data for each individual sfi class.
5663 			 */
5664 			if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5665 			    KERN_SUCCESS == ledger_get_entries(task->ledger,
5666 			    task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5667 				total_sfi_ledger_val += val_credit;
5668 			}
5669 			prev_lentry = task_ledgers.sfi_wait_times[i];
5670 		}
5671 
5672 #endif /* CONFIG_SCHED_SFI */
5673 		wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5674 		*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5675 
5676 		break;
5677 	}
5678 	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5679 	{
5680 #if DEVELOPMENT || DEBUG
5681 		pvm_account_info_t      acnt_info;
5682 
5683 		if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5684 			error = KERN_INVALID_ARGUMENT;
5685 			break;
5686 		}
5687 
5688 		if (task_info_out == NULL) {
5689 			error = KERN_INVALID_ARGUMENT;
5690 			break;
5691 		}
5692 
5693 		acnt_info = (pvm_account_info_t) task_info_out;
5694 
5695 		error = vm_purgeable_account(task, acnt_info);
5696 
5697 		*task_info_count = PVM_ACCOUNT_INFO_COUNT;
5698 
5699 		break;
5700 #else /* DEVELOPMENT || DEBUG */
5701 		error = KERN_NOT_SUPPORTED;
5702 		break;
5703 #endif /* DEVELOPMENT || DEBUG */
5704 	}
5705 	case TASK_FLAGS_INFO:
5706 	{
5707 		task_flags_info_t               flags_info;
5708 
5709 		if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5710 			error = KERN_INVALID_ARGUMENT;
5711 			break;
5712 		}
5713 
5714 		flags_info = (task_flags_info_t)task_info_out;
5715 
5716 		/* only publish the 64-bit flag of the task */
5717 		flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5718 
5719 		*task_info_count = TASK_FLAGS_INFO_COUNT;
5720 		break;
5721 	}
5722 
5723 	case TASK_DEBUG_INFO_INTERNAL:
5724 	{
5725 #if DEVELOPMENT || DEBUG
5726 		task_debug_info_internal_t dbg_info;
5727 		ipc_space_t space = task->itk_space;
5728 		if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5729 			error = KERN_NOT_SUPPORTED;
5730 			break;
5731 		}
5732 
5733 		if (task_info_out == NULL) {
5734 			error = KERN_INVALID_ARGUMENT;
5735 			break;
5736 		}
5737 		dbg_info = (task_debug_info_internal_t) task_info_out;
5738 		dbg_info->ipc_space_size = 0;
5739 
5740 		if (space) {
5741 #if MACH_LOCKFREE_SPACE
5742 			hazard_guard_t guard = hazard_guard_get(0);
5743 			ipc_entry_t table = hazard_guard_acquire(guard, &space->is_table);
5744 			if (table) {
5745 				dbg_info->ipc_space_size = table->ie_size;
5746 			}
5747 			hazard_guard_put(guard);
5748 #else
5749 			is_read_lock(space);
5750 			if (is_active(space)) {
5751 				dbg_info->ipc_space_size =
5752 				    is_active_table(space)->ie_size;
5753 			}
5754 			is_read_unlock(space);
5755 #endif
5756 		}
5757 
5758 		dbg_info->suspend_count = task->suspend_count;
5759 
5760 		error = KERN_SUCCESS;
5761 		*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5762 		break;
5763 #else /* DEVELOPMENT || DEBUG */
5764 		error = KERN_NOT_SUPPORTED;
5765 		break;
5766 #endif /* DEVELOPMENT || DEBUG */
5767 	}
5768 	default:
5769 		error = KERN_INVALID_ARGUMENT;
5770 	}
5771 
5772 	task_unlock(task);
5773 	return error;
5774 }
5775 
5776 /*
5777  * task_info_from_user
5778  *
5779  * When calling task_info from user space,
5780  * this function will be executed as mig server side
5781  * instead of calling directly into task_info.
5782  * This gives the possibility to perform more security
5783  * checks on task_port.
5784  *
5785  * In the case of TASK_DYLD_INFO, we require the more
5786  * privileged task_read_port not the less-privileged task_name_port.
5787  *
5788  */
5789 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5790 task_info_from_user(
5791 	mach_port_t             task_port,
5792 	task_flavor_t           flavor,
5793 	task_info_t             task_info_out,
5794 	mach_msg_type_number_t  *task_info_count)
5795 {
5796 	task_t task;
5797 	kern_return_t ret;
5798 
5799 	if (flavor == TASK_DYLD_INFO) {
5800 		task = convert_port_to_task_read(task_port);
5801 	} else {
5802 		task = convert_port_to_task_name(task_port);
5803 	}
5804 
5805 	ret = task_info(task, flavor, task_info_out, task_info_count);
5806 
5807 	task_deallocate(task);
5808 
5809 	return ret;
5810 }
5811 
5812 /*
5813  * Routine: task_dyld_process_info_update_helper
5814  *
5815  * Release send rights in release_ports.
5816  *
5817  * If no active ports found in task's dyld notifier array, unset the magic value
5818  * in user space to indicate so.
5819  *
5820  * Condition:
5821  *      task's itk_lock is locked, and is unlocked upon return.
5822  *      Global g_dyldinfo_mtx is locked, and is unlocked upon return.
5823  */
5824 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)5825 task_dyld_process_info_update_helper(
5826 	task_t                  task,
5827 	size_t                  active_count,
5828 	vm_map_address_t        magic_addr,    /* a userspace address */
5829 	ipc_port_t             *release_ports,
5830 	size_t                  release_count)
5831 {
5832 	void *notifiers_ptr = NULL;
5833 
5834 	assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5835 
5836 	if (active_count == 0) {
5837 		assert(task->itk_dyld_notify != NULL);
5838 		notifiers_ptr = task->itk_dyld_notify;
5839 		task->itk_dyld_notify = NULL;
5840 		itk_unlock(task);
5841 
5842 		kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
5843 		(void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
5844 	} else {
5845 		itk_unlock(task);
5846 		(void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
5847 		    magic_addr);     /* reset magic */
5848 	}
5849 
5850 	lck_mtx_unlock(&g_dyldinfo_mtx);
5851 
5852 	for (size_t i = 0; i < release_count; i++) {
5853 		ipc_port_release_send(release_ports[i]);
5854 	}
5855 }
5856 
5857 /*
5858  * Routine: task_dyld_process_info_notify_register
5859  *
5860  * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
5861  * memory for the array if it's the first port to be registered. Also cleanup
5862  * any dead rights found in the array.
5863  *
5864  * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
5865  *
5866  * Args:
5867  *     task:   Target task for the registration.
5868  *     sright: A send right.
5869  *
5870  * Returns:
5871  *     KERN_SUCCESS: Registration succeeded.
5872  *     KERN_INVALID_TASK: task is invalid.
5873  *     KERN_INVALID_RIGHT: sright is invalid.
5874  *     KERN_DENIED: Security policy denied this call.
5875  *     KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
5876  *     KERN_NO_SPACE: No available notifier port slot left for this task.
5877  *     KERN_RIGHT_EXISTS: The notifier port is already registered and active.
5878  *
5879  *     Other error code see task_info().
5880  *
5881  * See Also:
5882  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
5883  */
5884 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)5885 task_dyld_process_info_notify_register(
5886 	task_t                  task,
5887 	ipc_port_t              sright)
5888 {
5889 	struct task_dyld_info dyld_info;
5890 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
5891 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
5892 	uint32_t release_count = 0, active_count = 0;
5893 	mach_vm_address_t ports_addr; /* a user space address */
5894 	kern_return_t kr;
5895 	boolean_t right_exists = false;
5896 	ipc_port_t *notifiers_ptr = NULL;
5897 	ipc_port_t *portp;
5898 
5899 	if (task == TASK_NULL || task == kernel_task) {
5900 		return KERN_INVALID_TASK;
5901 	}
5902 
5903 	if (!IP_VALID(sright)) {
5904 		return KERN_INVALID_RIGHT;
5905 	}
5906 
5907 #if CONFIG_MACF
5908 	if (mac_task_check_dyld_process_info_notify_register()) {
5909 		return KERN_DENIED;
5910 	}
5911 #endif
5912 
5913 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
5914 	if (kr) {
5915 		return kr;
5916 	}
5917 
5918 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
5919 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5920 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
5921 	} else {
5922 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5923 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
5924 	}
5925 
5926 	if (task->itk_dyld_notify == NULL) {
5927 		notifiers_ptr = kalloc_type(ipc_port_t,
5928 		    DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
5929 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
5930 	}
5931 
5932 	lck_mtx_lock(&g_dyldinfo_mtx);
5933 	itk_lock(task);
5934 
5935 	if (task->itk_dyld_notify == NULL) {
5936 		task->itk_dyld_notify = notifiers_ptr;
5937 		notifiers_ptr = NULL;
5938 	}
5939 
5940 	assert(task->itk_dyld_notify != NULL);
5941 	/* First pass: clear dead names and check for duplicate registration */
5942 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5943 		portp = &task->itk_dyld_notify[slot];
5944 		if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
5945 			release_ports[release_count++] = *portp;
5946 			*portp = IPC_PORT_NULL;
5947 		} else if (*portp == sright) {
5948 			/* the port is already registered and is active */
5949 			right_exists = true;
5950 		}
5951 
5952 		if (*portp != IPC_PORT_NULL) {
5953 			active_count++;
5954 		}
5955 	}
5956 
5957 	if (right_exists) {
5958 		/* skip second pass */
5959 		kr = KERN_RIGHT_EXISTS;
5960 		goto out;
5961 	}
5962 
5963 	/* Second pass: register the port */
5964 	kr = KERN_NO_SPACE;
5965 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5966 		portp = &task->itk_dyld_notify[slot];
5967 		if (*portp == IPC_PORT_NULL) {
5968 			*portp = sright;
5969 			active_count++;
5970 			kr = KERN_SUCCESS;
5971 			break;
5972 		}
5973 	}
5974 
5975 out:
5976 	assert(active_count > 0);
5977 
5978 	task_dyld_process_info_update_helper(task, active_count,
5979 	    (vm_map_address_t)ports_addr, release_ports, release_count);
5980 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
5981 
5982 	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
5983 
5984 	return kr;
5985 }
5986 
5987 /*
5988  * Routine: task_dyld_process_info_notify_deregister
5989  *
5990  * Remove a send right in target task's itk_dyld_notify array matching the receive
5991  * right name passed in. Deallocate kernel memory for the array if it's the last port to
5992  * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
5993  *
5994  * Does not consume any reference.
5995  *
5996  * Args:
5997  *     task: Target task for the deregistration.
5998  *     rcv_name: The name denoting the receive right in caller's space.
5999  *
6000  * Returns:
6001  *     KERN_SUCCESS: A matching entry found and degistration succeeded.
6002  *     KERN_INVALID_TASK: task is invalid.
6003  *     KERN_INVALID_NAME: name is invalid.
6004  *     KERN_DENIED: Security policy denied this call.
6005  *     KERN_FAILURE: A matching entry is not found.
6006  *     KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6007  *
6008  *     Other error code see task_info().
6009  *
6010  * See Also:
6011  *     task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6012  */
6013 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6014 task_dyld_process_info_notify_deregister(
6015 	task_t                  task,
6016 	mach_port_name_t        rcv_name)
6017 {
6018 	struct task_dyld_info dyld_info;
6019 	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6020 	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6021 	uint32_t release_count = 0, active_count = 0;
6022 	boolean_t port_found = false;
6023 	mach_vm_address_t ports_addr; /* a user space address */
6024 	ipc_port_t sright;
6025 	kern_return_t kr;
6026 	ipc_port_t *portp;
6027 
6028 	if (task == TASK_NULL || task == kernel_task) {
6029 		return KERN_INVALID_TASK;
6030 	}
6031 
6032 	if (!MACH_PORT_VALID(rcv_name)) {
6033 		return KERN_INVALID_NAME;
6034 	}
6035 
6036 #if CONFIG_MACF
6037 	if (mac_task_check_dyld_process_info_notify_register()) {
6038 		return KERN_DENIED;
6039 	}
6040 #endif
6041 
6042 	kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6043 	if (kr) {
6044 		return kr;
6045 	}
6046 
6047 	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6048 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6049 		    offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6050 	} else {
6051 		ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6052 		    offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6053 	}
6054 
6055 	kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6056 	if (kr) {
6057 		return KERN_INVALID_RIGHT;
6058 	}
6059 
6060 	ip_reference(sright);
6061 	ip_mq_unlock(sright);
6062 
6063 	assert(sright != IPC_PORT_NULL);
6064 
6065 	lck_mtx_lock(&g_dyldinfo_mtx);
6066 	itk_lock(task);
6067 
6068 	if (task->itk_dyld_notify == NULL) {
6069 		itk_unlock(task);
6070 		lck_mtx_unlock(&g_dyldinfo_mtx);
6071 		ip_release(sright);
6072 		return KERN_FAILURE;
6073 	}
6074 
6075 	for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6076 		portp = &task->itk_dyld_notify[slot];
6077 		if (*portp == sright) {
6078 			release_ports[release_count++] = *portp;
6079 			*portp = IPC_PORT_NULL;
6080 			port_found = true;
6081 		} else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6082 			release_ports[release_count++] = *portp;
6083 			*portp = IPC_PORT_NULL;
6084 		}
6085 
6086 		if (*portp != IPC_PORT_NULL) {
6087 			active_count++;
6088 		}
6089 	}
6090 
6091 	task_dyld_process_info_update_helper(task, active_count,
6092 	    (vm_map_address_t)ports_addr, release_ports, release_count);
6093 	/* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6094 
6095 	ip_release(sright);
6096 
6097 	return port_found ? KERN_SUCCESS : KERN_FAILURE;
6098 }
6099 
6100 /*
6101  *	task_power_info
6102  *
6103  *	Returns power stats for the task.
6104  *	Note: Called with task locked.
6105  */
6106 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,uint64_t * runnable_time)6107 task_power_info_locked(
6108 	task_t                  task,
6109 	task_power_info_t       info,
6110 	gpu_energy_data_t       ginfo,
6111 	task_power_info_v2_t    infov2,
6112 	uint64_t                *runnable_time)
6113 {
6114 	thread_t                thread;
6115 	ledger_amount_t         tmp;
6116 
6117 	uint64_t                runnable_time_sum = 0;
6118 
6119 	task_lock_assert_owned(task);
6120 
6121 	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6122 	    (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6123 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6124 	    (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6125 
6126 	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6127 	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6128 
6129 	info->total_user = task->total_user_time;
6130 	info->total_system = task->total_system_time;
6131 	runnable_time_sum = task->total_runnable_time;
6132 
6133 #if defined(__arm__) || defined(__arm64__)
6134 	if (infov2) {
6135 		infov2->task_energy = task->task_energy;
6136 	}
6137 #endif /* defined(__arm__) || defined(__arm64__) */
6138 
6139 	if (ginfo) {
6140 		ginfo->task_gpu_utilisation = task->task_gpu_ns;
6141 	}
6142 
6143 	if (infov2) {
6144 		infov2->task_ptime = task->total_ptime;
6145 		infov2->task_pset_switches = task->ps_switch;
6146 	}
6147 
6148 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6149 		uint64_t        tval;
6150 		spl_t           x;
6151 
6152 		if (thread->options & TH_OPT_IDLE_THREAD) {
6153 			continue;
6154 		}
6155 
6156 		x = splsched();
6157 		thread_lock(thread);
6158 
6159 		info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6160 		info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6161 
6162 #if defined(__arm__) || defined(__arm64__)
6163 		if (infov2) {
6164 			infov2->task_energy += ml_energy_stat(thread);
6165 		}
6166 #endif /* defined(__arm__) || defined(__arm64__) */
6167 
6168 		tval = timer_grab(&thread->user_timer);
6169 		info->total_user += tval;
6170 
6171 		if (infov2) {
6172 			tval = timer_grab(&thread->ptime);
6173 			infov2->task_ptime += tval;
6174 			infov2->task_pset_switches += thread->ps_switch;
6175 		}
6176 
6177 		tval = timer_grab(&thread->system_timer);
6178 		if (thread->precise_user_kernel_time) {
6179 			info->total_system += tval;
6180 		} else {
6181 			/* system_timer may represent either sys or user */
6182 			info->total_user += tval;
6183 		}
6184 
6185 		tval = timer_grab(&thread->runnable_timer);
6186 
6187 		runnable_time_sum += tval;
6188 
6189 		if (ginfo) {
6190 			ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6191 		}
6192 		thread_unlock(thread);
6193 		splx(x);
6194 	}
6195 
6196 	if (runnable_time) {
6197 		*runnable_time = runnable_time_sum;
6198 	}
6199 }
6200 
6201 /*
6202  *	task_gpu_utilisation
6203  *
6204  *	Returns the total gpu time used by the all the threads of the task
6205  *  (both dead and alive)
6206  */
6207 uint64_t
task_gpu_utilisation(task_t task)6208 task_gpu_utilisation(
6209 	task_t  task)
6210 {
6211 	uint64_t gpu_time = 0;
6212 #if defined(__x86_64__)
6213 	thread_t thread;
6214 
6215 	task_lock(task);
6216 	gpu_time += task->task_gpu_ns;
6217 
6218 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6219 		spl_t x;
6220 		x = splsched();
6221 		thread_lock(thread);
6222 		gpu_time += ml_gpu_stat(thread);
6223 		thread_unlock(thread);
6224 		splx(x);
6225 	}
6226 
6227 	task_unlock(task);
6228 #else /* defined(__x86_64__) */
6229 	/* silence compiler warning */
6230 	(void)task;
6231 #endif /* defined(__x86_64__) */
6232 	return gpu_time;
6233 }
6234 
6235 /*
6236  *	task_energy
6237  *
6238  *	Returns the total energy used by the all the threads of the task
6239  *  (both dead and alive)
6240  */
6241 uint64_t
task_energy(task_t task)6242 task_energy(
6243 	task_t  task)
6244 {
6245 	uint64_t energy = 0;
6246 	thread_t thread;
6247 
6248 	task_lock(task);
6249 	energy += task->task_energy;
6250 
6251 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6252 		spl_t x;
6253 		x = splsched();
6254 		thread_lock(thread);
6255 		energy += ml_energy_stat(thread);
6256 		thread_unlock(thread);
6257 		splx(x);
6258 	}
6259 
6260 	task_unlock(task);
6261 	return energy;
6262 }
6263 
6264 #if __AMP__
6265 
6266 uint64_t
task_cpu_ptime(task_t task)6267 task_cpu_ptime(
6268 	task_t  task)
6269 {
6270 	uint64_t cpu_ptime = 0;
6271 	thread_t thread;
6272 
6273 	task_lock(task);
6274 	cpu_ptime += task->total_ptime;
6275 
6276 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6277 		if (thread->options & TH_OPT_IDLE_THREAD) {
6278 			continue;
6279 		}
6280 		cpu_ptime += timer_grab(&thread->ptime);
6281 	}
6282 
6283 	task_unlock(task);
6284 	return cpu_ptime;
6285 }
6286 
6287 #else /* __AMP__ */
6288 
6289 uint64_t
task_cpu_ptime(__unused task_t task)6290 task_cpu_ptime(
6291 	__unused task_t  task)
6292 {
6293 	return 0;
6294 }
6295 
6296 #endif /* __AMP__ */
6297 
6298 /* This function updates the cpu time in the arrays for each
6299  * effective and requested QoS class
6300  */
6301 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6302 task_update_cpu_time_qos_stats(
6303 	task_t  task,
6304 	uint64_t *eqos_stats,
6305 	uint64_t *rqos_stats)
6306 {
6307 	if (!eqos_stats && !rqos_stats) {
6308 		return;
6309 	}
6310 
6311 	task_lock(task);
6312 	thread_t thread;
6313 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6314 		if (thread->options & TH_OPT_IDLE_THREAD) {
6315 			continue;
6316 		}
6317 
6318 		thread_update_qos_cpu_time(thread);
6319 	}
6320 
6321 	if (eqos_stats) {
6322 		eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6323 		eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6324 		eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6325 		eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6326 		eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6327 		eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6328 		eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6329 	}
6330 
6331 	if (rqos_stats) {
6332 		rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6333 		rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6334 		rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6335 		rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6336 		rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6337 		rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6338 		rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6339 	}
6340 
6341 	task_unlock(task);
6342 }
6343 
6344 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6345 task_purgable_info(
6346 	task_t                  task,
6347 	task_purgable_info_t    *stats)
6348 {
6349 	if (task == TASK_NULL || stats == NULL) {
6350 		return KERN_INVALID_ARGUMENT;
6351 	}
6352 	/* Take task reference */
6353 	task_reference(task);
6354 	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6355 	/* Drop task reference */
6356 	task_deallocate(task);
6357 	return KERN_SUCCESS;
6358 }
6359 
6360 void
task_vtimer_set(task_t task,integer_t which)6361 task_vtimer_set(
6362 	task_t          task,
6363 	integer_t       which)
6364 {
6365 	thread_t        thread;
6366 	spl_t           x;
6367 
6368 	task_lock(task);
6369 
6370 	task->vtimers |= which;
6371 
6372 	switch (which) {
6373 	case TASK_VTIMER_USER:
6374 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6375 			x = splsched();
6376 			thread_lock(thread);
6377 			if (thread->precise_user_kernel_time) {
6378 				thread->vtimer_user_save = timer_grab(&thread->user_timer);
6379 			} else {
6380 				thread->vtimer_user_save = timer_grab(&thread->system_timer);
6381 			}
6382 			thread_unlock(thread);
6383 			splx(x);
6384 		}
6385 		break;
6386 
6387 	case TASK_VTIMER_PROF:
6388 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6389 			x = splsched();
6390 			thread_lock(thread);
6391 			thread->vtimer_prof_save = timer_grab(&thread->user_timer);
6392 			thread->vtimer_prof_save += timer_grab(&thread->system_timer);
6393 			thread_unlock(thread);
6394 			splx(x);
6395 		}
6396 		break;
6397 
6398 	case TASK_VTIMER_RLIM:
6399 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
6400 			x = splsched();
6401 			thread_lock(thread);
6402 			thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
6403 			thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
6404 			thread_unlock(thread);
6405 			splx(x);
6406 		}
6407 		break;
6408 	}
6409 
6410 	task_unlock(task);
6411 }
6412 
6413 void
task_vtimer_clear(task_t task,integer_t which)6414 task_vtimer_clear(
6415 	task_t          task,
6416 	integer_t       which)
6417 {
6418 	assert(task == current_task());
6419 
6420 	task_lock(task);
6421 
6422 	task->vtimers &= ~which;
6423 
6424 	task_unlock(task);
6425 }
6426 
6427 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6428 task_vtimer_update(
6429 	__unused
6430 	task_t          task,
6431 	integer_t       which,
6432 	uint32_t        *microsecs)
6433 {
6434 	thread_t        thread = current_thread();
6435 	uint32_t        tdelt = 0;
6436 	clock_sec_t     secs = 0;
6437 	uint64_t        tsum;
6438 
6439 	assert(task == current_task());
6440 
6441 	spl_t s = splsched();
6442 	thread_lock(thread);
6443 
6444 	if ((task->vtimers & which) != (uint32_t)which) {
6445 		thread_unlock(thread);
6446 		splx(s);
6447 		return;
6448 	}
6449 
6450 	switch (which) {
6451 	case TASK_VTIMER_USER:
6452 		if (thread->precise_user_kernel_time) {
6453 			tdelt = (uint32_t)timer_delta(&thread->user_timer,
6454 			    &thread->vtimer_user_save);
6455 		} else {
6456 			tdelt = (uint32_t)timer_delta(&thread->system_timer,
6457 			    &thread->vtimer_user_save);
6458 		}
6459 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6460 		break;
6461 
6462 	case TASK_VTIMER_PROF:
6463 		tsum = timer_grab(&thread->user_timer);
6464 		tsum += timer_grab(&thread->system_timer);
6465 		tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6466 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6467 		/* if the time delta is smaller than a usec, ignore */
6468 		if (*microsecs != 0) {
6469 			thread->vtimer_prof_save = tsum;
6470 		}
6471 		break;
6472 
6473 	case TASK_VTIMER_RLIM:
6474 		tsum = timer_grab(&thread->user_timer);
6475 		tsum += timer_grab(&thread->system_timer);
6476 		tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6477 		thread->vtimer_rlim_save = tsum;
6478 		absolutetime_to_microtime(tdelt, &secs, microsecs);
6479 		break;
6480 	}
6481 
6482 	thread_unlock(thread);
6483 	splx(s);
6484 }
6485 
6486 /*
6487  *	task_assign:
6488  *
6489  *	Change the assigned processor set for the task
6490  */
6491 kern_return_t
task_assign(__unused task_t task,__unused processor_set_t new_pset,__unused boolean_t assign_threads)6492 task_assign(
6493 	__unused task_t         task,
6494 	__unused processor_set_t        new_pset,
6495 	__unused boolean_t      assign_threads)
6496 {
6497 	return KERN_FAILURE;
6498 }
6499 
6500 /*
6501  *	task_assign_default:
6502  *
6503  *	Version of task_assign to assign to default processor set.
6504  */
6505 kern_return_t
task_assign_default(task_t task,boolean_t assign_threads)6506 task_assign_default(
6507 	task_t          task,
6508 	boolean_t       assign_threads)
6509 {
6510 	return task_assign(task, &pset0, assign_threads);
6511 }
6512 
6513 /*
6514  *	task_get_assignment
6515  *
6516  *	Return name of processor set that task is assigned to.
6517  */
6518 kern_return_t
task_get_assignment(task_t task,processor_set_t * pset)6519 task_get_assignment(
6520 	task_t          task,
6521 	processor_set_t *pset)
6522 {
6523 	if (!task || !task->active) {
6524 		return KERN_FAILURE;
6525 	}
6526 
6527 	*pset = &pset0;
6528 
6529 	return KERN_SUCCESS;
6530 }
6531 
6532 uint64_t
get_task_dispatchqueue_offset(task_t task)6533 get_task_dispatchqueue_offset(
6534 	task_t          task)
6535 {
6536 	return task->dispatchqueue_offset;
6537 }
6538 
6539 /*
6540  *      task_policy
6541  *
6542  *	Set scheduling policy and parameters, both base and limit, for
6543  *	the given task. Policy must be a policy which is enabled for the
6544  *	processor set. Change contained threads if requested.
6545  */
6546 kern_return_t
task_policy(__unused task_t task,__unused policy_t policy_id,__unused policy_base_t base,__unused mach_msg_type_number_t count,__unused boolean_t set_limit,__unused boolean_t change)6547 task_policy(
6548 	__unused task_t                 task,
6549 	__unused policy_t                       policy_id,
6550 	__unused policy_base_t          base,
6551 	__unused mach_msg_type_number_t count,
6552 	__unused boolean_t                      set_limit,
6553 	__unused boolean_t                      change)
6554 {
6555 	return KERN_FAILURE;
6556 }
6557 
6558 /*
6559  *	task_set_policy
6560  *
6561  *	Set scheduling policy and parameters, both base and limit, for
6562  *	the given task. Policy can be any policy implemented by the
6563  *	processor set, whether enabled or not. Change contained threads
6564  *	if requested.
6565  */
6566 kern_return_t
task_set_policy(__unused task_t task,__unused processor_set_t pset,__unused policy_t policy_id,__unused policy_base_t base,__unused mach_msg_type_number_t base_count,__unused policy_limit_t limit,__unused mach_msg_type_number_t limit_count,__unused boolean_t change)6567 task_set_policy(
6568 	__unused task_t                 task,
6569 	__unused processor_set_t                pset,
6570 	__unused policy_t                       policy_id,
6571 	__unused policy_base_t          base,
6572 	__unused mach_msg_type_number_t base_count,
6573 	__unused policy_limit_t         limit,
6574 	__unused mach_msg_type_number_t limit_count,
6575 	__unused boolean_t                      change)
6576 {
6577 	return KERN_FAILURE;
6578 }
6579 
6580 kern_return_t
task_set_ras_pc(__unused task_t task,__unused vm_offset_t pc,__unused vm_offset_t endpc)6581 task_set_ras_pc(
6582 	__unused task_t task,
6583 	__unused vm_offset_t    pc,
6584 	__unused vm_offset_t    endpc)
6585 {
6586 	return KERN_FAILURE;
6587 }
6588 
6589 void
task_synchronizer_destroy_all(task_t task)6590 task_synchronizer_destroy_all(task_t task)
6591 {
6592 	/*
6593 	 *  Destroy owned semaphores
6594 	 */
6595 	semaphore_destroy_all(task);
6596 }
6597 
6598 /*
6599  * Install default (machine-dependent) initial thread state
6600  * on the task.  Subsequent thread creation will have this initial
6601  * state set on the thread by machine_thread_inherit_taskwide().
6602  * Flavors and structures are exactly the same as those to thread_set_state()
6603  */
6604 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6605 task_set_state(
6606 	task_t task,
6607 	int flavor,
6608 	thread_state_t state,
6609 	mach_msg_type_number_t state_count)
6610 {
6611 	kern_return_t ret;
6612 
6613 	if (task == TASK_NULL) {
6614 		return KERN_INVALID_ARGUMENT;
6615 	}
6616 
6617 	task_lock(task);
6618 
6619 	if (!task->active) {
6620 		task_unlock(task);
6621 		return KERN_FAILURE;
6622 	}
6623 
6624 	ret = machine_task_set_state(task, flavor, state, state_count);
6625 
6626 	task_unlock(task);
6627 	return ret;
6628 }
6629 
6630 /*
6631  * Examine the default (machine-dependent) initial thread state
6632  * on the task, as set by task_set_state().  Flavors and structures
6633  * are exactly the same as those passed to thread_get_state().
6634  */
6635 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6636 task_get_state(
6637 	task_t  task,
6638 	int     flavor,
6639 	thread_state_t state,
6640 	mach_msg_type_number_t *state_count)
6641 {
6642 	kern_return_t ret;
6643 
6644 	if (task == TASK_NULL) {
6645 		return KERN_INVALID_ARGUMENT;
6646 	}
6647 
6648 	task_lock(task);
6649 
6650 	if (!task->active) {
6651 		task_unlock(task);
6652 		return KERN_FAILURE;
6653 	}
6654 
6655 	ret = machine_task_get_state(task, flavor, state, state_count);
6656 
6657 	task_unlock(task);
6658 	return ret;
6659 }
6660 
6661 
6662 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason)6663 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
6664 	mach_exception_code_t code,
6665 	mach_exception_subcode_t subcode,
6666 	void *reason)
6667 {
6668 #ifdef MACH_BSD
6669 	if (1 == proc_selfpid()) {
6670 		return KERN_NOT_SUPPORTED;              // initproc is immune
6671 	}
6672 #endif
6673 	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6674 		[0] = code,
6675 		[1] = subcode,
6676 	};
6677 	task_t task = current_task();
6678 	kern_return_t kr;
6679 
6680 	/* (See jetsam-related comments below) */
6681 
6682 	proc_memstat_skip(task->bsd_info, TRUE);
6683 	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
6684 	proc_memstat_skip(task->bsd_info, FALSE);
6685 	return kr;
6686 }
6687 
6688 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason)6689 task_violated_guard(
6690 	mach_exception_code_t code,
6691 	mach_exception_subcode_t subcode,
6692 	void *reason)
6693 {
6694 	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
6695 }
6696 
6697 
6698 #if CONFIG_MEMORYSTATUS
6699 
6700 boolean_t
task_get_memlimit_is_active(task_t task)6701 task_get_memlimit_is_active(task_t task)
6702 {
6703 	assert(task != NULL);
6704 
6705 	if (task->memlimit_is_active == 1) {
6706 		return TRUE;
6707 	} else {
6708 		return FALSE;
6709 	}
6710 }
6711 
6712 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6713 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6714 {
6715 	assert(task != NULL);
6716 
6717 	if (memlimit_is_active) {
6718 		task->memlimit_is_active = 1;
6719 	} else {
6720 		task->memlimit_is_active = 0;
6721 	}
6722 }
6723 
6724 boolean_t
task_get_memlimit_is_fatal(task_t task)6725 task_get_memlimit_is_fatal(task_t task)
6726 {
6727 	assert(task != NULL);
6728 
6729 	if (task->memlimit_is_fatal == 1) {
6730 		return TRUE;
6731 	} else {
6732 		return FALSE;
6733 	}
6734 }
6735 
6736 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6737 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6738 {
6739 	assert(task != NULL);
6740 
6741 	if (memlimit_is_fatal) {
6742 		task->memlimit_is_fatal = 1;
6743 	} else {
6744 		task->memlimit_is_fatal = 0;
6745 	}
6746 }
6747 
6748 uint64_t
task_get_dirty_start(task_t task)6749 task_get_dirty_start(task_t task)
6750 {
6751 	return task->memstat_dirty_start;
6752 }
6753 
6754 void
task_set_dirty_start(task_t task,uint64_t start)6755 task_set_dirty_start(task_t task, uint64_t start)
6756 {
6757 	task_lock(task);
6758 	task->memstat_dirty_start = start;
6759 	task_unlock(task);
6760 }
6761 
6762 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6763 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6764 {
6765 	boolean_t triggered = FALSE;
6766 
6767 	assert(task == current_task());
6768 
6769 	/*
6770 	 * Returns true, if task has already triggered an exc_resource exception.
6771 	 */
6772 
6773 	if (memlimit_is_active) {
6774 		triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6775 	} else {
6776 		triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6777 	}
6778 
6779 	return triggered;
6780 }
6781 
6782 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6783 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6784 {
6785 	assert(task == current_task());
6786 
6787 	/*
6788 	 * We allow one exc_resource per process per active/inactive limit.
6789 	 * The limit's fatal attribute does not come into play.
6790 	 */
6791 
6792 	if (memlimit_is_active) {
6793 		task->memlimit_active_exc_resource = 1;
6794 	} else {
6795 		task->memlimit_inactive_exc_resource = 1;
6796 	}
6797 }
6798 
6799 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6800 
6801 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,boolean_t is_fatal)6802 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6803 {
6804 	task_t                                          task            = current_task();
6805 	int                                                     pid         = 0;
6806 	const char                                      *procname       = "unknown";
6807 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6808 	boolean_t send_sync_exc_resource = FALSE;
6809 
6810 #ifdef MACH_BSD
6811 	pid = proc_selfpid();
6812 
6813 	if (pid == 1) {
6814 		/*
6815 		 * Cannot have ReportCrash analyzing
6816 		 * a suspended initproc.
6817 		 */
6818 		return;
6819 	}
6820 
6821 	if (task->bsd_info != NULL) {
6822 		procname = proc_name_address(current_task()->bsd_info);
6823 		send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6824 	}
6825 #endif
6826 #if CONFIG_COREDUMP
6827 	if (hwm_user_cores) {
6828 		int                             error;
6829 		uint64_t                starttime, end;
6830 		clock_sec_t             secs = 0;
6831 		uint32_t                microsecs = 0;
6832 
6833 		starttime = mach_absolute_time();
6834 		/*
6835 		 * Trigger a coredump of this process. Don't proceed unless we know we won't
6836 		 * be filling up the disk; and ignore the core size resource limit for this
6837 		 * core file.
6838 		 */
6839 		if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6840 			printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6841 		}
6842 		/*
6843 		 * coredump() leaves the task suspended.
6844 		 */
6845 		task_resume_internal(current_task());
6846 
6847 		end = mach_absolute_time();
6848 		absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6849 		printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6850 		    proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6851 	}
6852 #endif /* CONFIG_COREDUMP */
6853 
6854 	if (disable_exc_resource) {
6855 		printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6856 		    "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6857 		return;
6858 	}
6859 
6860 	/*
6861 	 * A task that has triggered an EXC_RESOURCE, should not be
6862 	 * jetsammed when the device is under memory pressure.  Here
6863 	 * we set the P_MEMSTAT_SKIP flag so that the process
6864 	 * will be skipped if the memorystatus_thread wakes up.
6865 	 *
6866 	 * This is a debugging aid to ensure we can get a corpse before
6867 	 * the jetsam thread kills the process.
6868 	 * Note that proc_memstat_skip is a no-op on release kernels.
6869 	 */
6870 	proc_memstat_skip(current_task()->bsd_info, TRUE);
6871 
6872 	code[0] = code[1] = 0;
6873 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6874 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6875 	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6876 
6877 	/*
6878 	 * Do not generate a corpse fork if the violation is a fatal one
6879 	 * or the process wants synchronous EXC_RESOURCE exceptions.
6880 	 */
6881 	if (is_fatal || send_sync_exc_resource || !exc_via_corpse_forking) {
6882 		/* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6883 		if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
6884 			/*
6885 			 * Use the _internal_ variant so that no user-space
6886 			 * process can resume our task from under us.
6887 			 */
6888 			task_suspend_internal(task);
6889 			exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6890 			task_resume_internal(task);
6891 		}
6892 	} else {
6893 		if (audio_active) {
6894 			printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6895 			    "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6896 		} else {
6897 			task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6898 			    code, EXCEPTION_CODE_MAX, NULL);
6899 		}
6900 	}
6901 
6902 	/*
6903 	 * After the EXC_RESOURCE has been handled, we must clear the
6904 	 * P_MEMSTAT_SKIP flag so that the process can again be
6905 	 * considered for jetsam if the memorystatus_thread wakes up.
6906 	 */
6907 	proc_memstat_skip(current_task()->bsd_info, FALSE);         /* clear the flag */
6908 }
6909 
6910 /*
6911  * Callback invoked when a task exceeds its physical footprint limit.
6912  */
6913 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)6914 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6915 {
6916 	ledger_amount_t max_footprint, max_footprint_mb;
6917 	task_t task;
6918 	boolean_t is_warning;
6919 	boolean_t memlimit_is_active;
6920 	boolean_t memlimit_is_fatal;
6921 
6922 	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6923 		/*
6924 		 * Task memory limits only provide a warning on the way up.
6925 		 */
6926 		return;
6927 	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6928 		/*
6929 		 * This task is in danger of violating a memory limit,
6930 		 * It has exceeded a percentage level of the limit.
6931 		 */
6932 		is_warning = TRUE;
6933 	} else {
6934 		/*
6935 		 * The task has exceeded the physical footprint limit.
6936 		 * This is not a warning but a true limit violation.
6937 		 */
6938 		is_warning = FALSE;
6939 	}
6940 
6941 	task = current_task();
6942 
6943 	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6944 	max_footprint_mb = max_footprint >> 20;
6945 
6946 	memlimit_is_active = task_get_memlimit_is_active(task);
6947 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6948 
6949 	/*
6950 	 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6951 	 * We only generate the exception once per process per memlimit (active/inactive limit).
6952 	 * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6953 	 * and we disable it by marking that memlimit as exception triggered.
6954 	 */
6955 	if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6956 		PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6957 		memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6958 		task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6959 	}
6960 
6961 	memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6962 }
6963 
6964 extern int proc_check_footprint_priv(void);
6965 
6966 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)6967 task_set_phys_footprint_limit(
6968 	task_t task,
6969 	int new_limit_mb,
6970 	int *old_limit_mb)
6971 {
6972 	kern_return_t error;
6973 
6974 	boolean_t memlimit_is_active;
6975 	boolean_t memlimit_is_fatal;
6976 
6977 	if ((error = proc_check_footprint_priv())) {
6978 		return KERN_NO_ACCESS;
6979 	}
6980 
6981 	/*
6982 	 * This call should probably be obsoleted.
6983 	 * But for now, we default to current state.
6984 	 */
6985 	memlimit_is_active = task_get_memlimit_is_active(task);
6986 	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6987 
6988 	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6989 }
6990 
6991 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)6992 task_convert_phys_footprint_limit(
6993 	int limit_mb,
6994 	int *converted_limit_mb)
6995 {
6996 	if (limit_mb == -1) {
6997 		/*
6998 		 * No limit
6999 		 */
7000 		if (max_task_footprint != 0) {
7001 			*converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);         /* bytes to MB */
7002 		} else {
7003 			*converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
7004 		}
7005 	} else {
7006 		/* nothing to convert */
7007 		*converted_limit_mb = limit_mb;
7008 	}
7009 	return KERN_SUCCESS;
7010 }
7011 
7012 
7013 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)7014 task_set_phys_footprint_limit_internal(
7015 	task_t task,
7016 	int new_limit_mb,
7017 	int *old_limit_mb,
7018 	boolean_t memlimit_is_active,
7019 	boolean_t memlimit_is_fatal)
7020 {
7021 	ledger_amount_t old;
7022 	kern_return_t ret;
7023 
7024 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
7025 
7026 	if (ret != KERN_SUCCESS) {
7027 		return ret;
7028 	}
7029 
7030 	/*
7031 	 * Check that limit >> 20 will not give an "unexpected" 32-bit
7032 	 * result. There are, however, implicit assumptions that -1 mb limit
7033 	 * equates to LEDGER_LIMIT_INFINITY.
7034 	 */
7035 	assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
7036 
7037 	if (old_limit_mb) {
7038 		*old_limit_mb = (int)(old >> 20);
7039 	}
7040 
7041 	if (new_limit_mb == -1) {
7042 		/*
7043 		 * Caller wishes to remove the limit.
7044 		 */
7045 		ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7046 		    max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7047 		    max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
7048 
7049 		task_lock(task);
7050 		task_set_memlimit_is_active(task, memlimit_is_active);
7051 		task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7052 		task_unlock(task);
7053 
7054 		return KERN_SUCCESS;
7055 	}
7056 
7057 #ifdef CONFIG_NOMONITORS
7058 	return KERN_SUCCESS;
7059 #endif /* CONFIG_NOMONITORS */
7060 
7061 	task_lock(task);
7062 
7063 	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7064 	    (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7065 	    (((ledger_amount_t)new_limit_mb << 20) == old)) {
7066 		/*
7067 		 * memlimit state is not changing
7068 		 */
7069 		task_unlock(task);
7070 		return KERN_SUCCESS;
7071 	}
7072 
7073 	task_set_memlimit_is_active(task, memlimit_is_active);
7074 	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7075 
7076 	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7077 	    (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
7078 
7079 	if (task == current_task()) {
7080 		ledger_check_new_balance(current_thread(), task->ledger,
7081 		    task_ledgers.phys_footprint);
7082 	}
7083 
7084 	task_unlock(task);
7085 
7086 	return KERN_SUCCESS;
7087 }
7088 
7089 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)7090 task_get_phys_footprint_limit(
7091 	task_t task,
7092 	int *limit_mb)
7093 {
7094 	ledger_amount_t limit;
7095 	kern_return_t ret;
7096 
7097 	ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
7098 	if (ret != KERN_SUCCESS) {
7099 		return ret;
7100 	}
7101 
7102 	/*
7103 	 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7104 	 * result. There are, however, implicit assumptions that -1 mb limit
7105 	 * equates to LEDGER_LIMIT_INFINITY.
7106 	 */
7107 	assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7108 	*limit_mb = (int)(limit >> 20);
7109 
7110 	return KERN_SUCCESS;
7111 }
7112 #else /* CONFIG_MEMORYSTATUS */
7113 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7114 task_set_phys_footprint_limit(
7115 	__unused task_t task,
7116 	__unused int new_limit_mb,
7117 	__unused int *old_limit_mb)
7118 {
7119 	return KERN_FAILURE;
7120 }
7121 
7122 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7123 task_get_phys_footprint_limit(
7124 	__unused task_t task,
7125 	__unused int *limit_mb)
7126 {
7127 	return KERN_FAILURE;
7128 }
7129 #endif /* CONFIG_MEMORYSTATUS */
7130 
7131 security_token_t *
task_get_sec_token(task_t task)7132 task_get_sec_token(task_t task)
7133 {
7134 	return &task_get_ro(task)->task_tokens.sec_token;
7135 }
7136 
7137 void
task_set_sec_token(task_t task,security_token_t * token)7138 task_set_sec_token(task_t task, security_token_t *token)
7139 {
7140 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7141 	    task_tokens.sec_token, token);
7142 }
7143 
7144 audit_token_t *
task_get_audit_token(task_t task)7145 task_get_audit_token(task_t task)
7146 {
7147 	return &task_get_ro(task)->task_tokens.audit_token;
7148 }
7149 
7150 void
task_set_audit_token(task_t task,audit_token_t * token)7151 task_set_audit_token(task_t task, audit_token_t *token)
7152 {
7153 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7154 	    task_tokens.audit_token, token);
7155 }
7156 
7157 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7158 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7159 {
7160 	struct task_token_ro_data tokens;
7161 
7162 	tokens = task_get_ro(task)->task_tokens;
7163 	tokens.sec_token = *sec_token;
7164 	tokens.audit_token = *audit_token;
7165 
7166 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7167 	    &tokens);
7168 }
7169 
7170 boolean_t
task_is_privileged(task_t task)7171 task_is_privileged(task_t task)
7172 {
7173 	return task_get_sec_token(task)->val[0] == 0;
7174 }
7175 
7176 #ifdef CONFIG_MACF
7177 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7178 task_get_mach_trap_filter_mask(task_t task)
7179 {
7180 	return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7181 }
7182 
7183 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7184 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7185 {
7186 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7187 	    task_filters.mach_trap_filter_mask, &mask);
7188 }
7189 
7190 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7191 task_get_mach_kobj_filter_mask(task_t task)
7192 {
7193 	return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7194 }
7195 
7196 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7197 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7198 {
7199 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7200 	    task_filters.mach_kobj_filter_mask, &mask);
7201 }
7202 
7203 void
task_copy_filter_masks(task_t new_task,task_t old_task)7204 task_copy_filter_masks(task_t new_task, task_t old_task)
7205 {
7206 	struct task_filter_ro_data filters;
7207 
7208 	filters = task_get_ro(new_task)->task_filters;
7209 	filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(old_task);
7210 	filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(old_task);
7211 
7212 	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(new_task),
7213 	    task_filters, &filters);
7214 }
7215 #endif /* CONFIG_MACF */
7216 
7217 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7218 task_set_thread_limit(task_t task, uint16_t thread_limit)
7219 {
7220 	assert(task != kernel_task);
7221 	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7222 		task_lock(task);
7223 		task->task_thread_limit = thread_limit;
7224 		task_unlock(task);
7225 	}
7226 }
7227 
7228 #if CONFIG_PROC_RESOURCE_LIMITS
7229 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7230 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7231 {
7232 	return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7233 }
7234 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7235 
7236 #if XNU_TARGET_OS_OSX
7237 boolean_t
task_has_system_version_compat_enabled(task_t task)7238 task_has_system_version_compat_enabled(task_t task)
7239 {
7240 	boolean_t enabled = FALSE;
7241 
7242 	task_lock(task);
7243 	enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7244 	task_unlock(task);
7245 
7246 	return enabled;
7247 }
7248 
7249 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7250 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7251 {
7252 	assert(task == current_task());
7253 	assert(task != kernel_task);
7254 
7255 	task_lock(task);
7256 	if (enable_system_version_compat) {
7257 		task->t_flags |= TF_SYS_VERSION_COMPAT;
7258 	} else {
7259 		task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7260 	}
7261 	task_unlock(task);
7262 }
7263 #endif /* XNU_TARGET_OS_OSX */
7264 
7265 /*
7266  * We need to export some functions to other components that
7267  * are currently implemented in macros within the osfmk
7268  * component.  Just export them as functions of the same name.
7269  */
7270 boolean_t
is_kerneltask(task_t t)7271 is_kerneltask(task_t t)
7272 {
7273 	if (t == kernel_task) {
7274 		return TRUE;
7275 	}
7276 
7277 	return FALSE;
7278 }
7279 
7280 boolean_t
is_corpsetask(task_t t)7281 is_corpsetask(task_t t)
7282 {
7283 	return task_is_a_corpse(t);
7284 }
7285 
7286 boolean_t
is_corpsefork(task_t t)7287 is_corpsefork(task_t t)
7288 {
7289 	return task_is_a_corpse_fork(t);
7290 }
7291 
7292 task_t
current_task_early(void)7293 current_task_early(void)
7294 {
7295 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7296 		if (current_thread()->t_tro == NULL) {
7297 			return TASK_NULL;
7298 		}
7299 	}
7300 	return get_threadtask(current_thread());
7301 }
7302 
7303 task_t
current_task(void)7304 current_task(void)
7305 {
7306 	return get_threadtask(current_thread());
7307 }
7308 
7309 /* defined in bsd/kern/kern_prot.c */
7310 extern int get_audit_token_pid(audit_token_t *audit_token);
7311 
7312 int
task_pid(task_t task)7313 task_pid(task_t task)
7314 {
7315 	if (task) {
7316 		return get_audit_token_pid(task_get_audit_token(task));
7317 	}
7318 	return -1;
7319 }
7320 
7321 #if __has_feature(ptrauth_calls)
7322 /*
7323  * Get the shared region id and jop signing key for the task.
7324  * The function will allocate a kalloc buffer and return
7325  * it to caller, the caller needs to free it. This is used
7326  * for getting the information via task port.
7327  */
7328 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7329 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7330 {
7331 	size_t len;
7332 	char *shared_region_id = NULL;
7333 
7334 	task_lock(task);
7335 	if (task->shared_region_id == NULL) {
7336 		task_unlock(task);
7337 		return NULL;
7338 	}
7339 	len = strlen(task->shared_region_id) + 1;
7340 
7341 	/* don't hold task lock while allocating */
7342 	task_unlock(task);
7343 	shared_region_id = kalloc_data(len, Z_WAITOK);
7344 	task_lock(task);
7345 
7346 	if (task->shared_region_id == NULL) {
7347 		task_unlock(task);
7348 		kfree_data(shared_region_id, len);
7349 		return NULL;
7350 	}
7351 	assert(len == strlen(task->shared_region_id) + 1);         /* should never change */
7352 	strlcpy(shared_region_id, task->shared_region_id, len);
7353 	task_unlock(task);
7354 
7355 	/* find key from its auth pager */
7356 	if (jop_pid != NULL) {
7357 		*jop_pid = shared_region_find_key(shared_region_id);
7358 	}
7359 
7360 	return shared_region_id;
7361 }
7362 
7363 /*
7364  * set the shared region id for a task
7365  */
7366 void
task_set_shared_region_id(task_t task,char * id)7367 task_set_shared_region_id(task_t task, char *id)
7368 {
7369 	char *old_id;
7370 
7371 	task_lock(task);
7372 	old_id = task->shared_region_id;
7373 	task->shared_region_id = id;
7374 	task->shared_region_auth_remapped = FALSE;
7375 	task_unlock(task);
7376 
7377 	/* free any pre-existing shared region id */
7378 	if (old_id != NULL) {
7379 		shared_region_key_dealloc(old_id);
7380 		kfree_data(old_id, strlen(old_id) + 1);
7381 	}
7382 }
7383 #endif /* __has_feature(ptrauth_calls) */
7384 
7385 /*
7386  * This routine finds a thread in a task by its unique id
7387  * Returns a referenced thread or THREAD_NULL if the thread was not found
7388  *
7389  * TODO: This is super inefficient - it's an O(threads in task) list walk!
7390  *       We should make a tid hash, or transition all tid clients to thread ports
7391  *
7392  * Precondition: No locks held (will take task lock)
7393  */
7394 thread_t
task_findtid(task_t task,uint64_t tid)7395 task_findtid(task_t task, uint64_t tid)
7396 {
7397 	thread_t self           = current_thread();
7398 	thread_t found_thread   = THREAD_NULL;
7399 	thread_t iter_thread    = THREAD_NULL;
7400 
7401 	/* Short-circuit the lookup if we're looking up ourselves */
7402 	if (tid == self->thread_id || tid == TID_NULL) {
7403 		assert(get_threadtask(self) == task);
7404 
7405 		thread_reference(self);
7406 
7407 		return self;
7408 	}
7409 
7410 	task_lock(task);
7411 
7412 	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7413 		if (iter_thread->thread_id == tid) {
7414 			found_thread = iter_thread;
7415 			thread_reference(found_thread);
7416 			break;
7417 		}
7418 	}
7419 
7420 	task_unlock(task);
7421 
7422 	return found_thread;
7423 }
7424 
7425 int
pid_from_task(task_t task)7426 pid_from_task(task_t task)
7427 {
7428 	int pid = -1;
7429 
7430 	if (task->bsd_info) {
7431 		pid = proc_pid(task->bsd_info);
7432 	} else {
7433 		pid = task_pid(task);
7434 	}
7435 
7436 	return pid;
7437 }
7438 
7439 /*
7440  * Control the CPU usage monitor for a task.
7441  */
7442 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7443 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7444 {
7445 	int error = KERN_SUCCESS;
7446 
7447 	if (*flags & CPUMON_MAKE_FATAL) {
7448 		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7449 	} else {
7450 		error = KERN_INVALID_ARGUMENT;
7451 	}
7452 
7453 	return error;
7454 }
7455 
7456 /*
7457  * Control the wakeups monitor for a task.
7458  */
7459 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7460 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7461 {
7462 	ledger_t ledger = task->ledger;
7463 
7464 	task_lock(task);
7465 	if (*flags & WAKEMON_GET_PARAMS) {
7466 		ledger_amount_t limit;
7467 		uint64_t                period;
7468 
7469 		ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7470 		ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7471 
7472 		if (limit != LEDGER_LIMIT_INFINITY) {
7473 			/*
7474 			 * An active limit means the wakeups monitor is enabled.
7475 			 */
7476 			*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7477 			*flags = WAKEMON_ENABLE;
7478 			if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7479 				*flags |= WAKEMON_MAKE_FATAL;
7480 			}
7481 		} else {
7482 			*flags = WAKEMON_DISABLE;
7483 			*rate_hz = -1;
7484 		}
7485 
7486 		/*
7487 		 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7488 		 */
7489 		task_unlock(task);
7490 		return KERN_SUCCESS;
7491 	}
7492 
7493 	if (*flags & WAKEMON_ENABLE) {
7494 		if (*flags & WAKEMON_SET_DEFAULTS) {
7495 			*rate_hz = task_wakeups_monitor_rate;
7496 		}
7497 
7498 #ifndef CONFIG_NOMONITORS
7499 		if (*flags & WAKEMON_MAKE_FATAL) {
7500 			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7501 		}
7502 #endif /* CONFIG_NOMONITORS */
7503 
7504 		if (*rate_hz <= 0) {
7505 			task_unlock(task);
7506 			return KERN_INVALID_ARGUMENT;
7507 		}
7508 
7509 #ifndef CONFIG_NOMONITORS
7510 		ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7511 		    (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7512 		ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7513 		ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7514 #endif /* CONFIG_NOMONITORS */
7515 	} else if (*flags & WAKEMON_DISABLE) {
7516 		/*
7517 		 * Caller wishes to disable wakeups monitor on the task.
7518 		 *
7519 		 * Disable telemetry if it was triggered by the wakeups monitor, and
7520 		 * remove the limit & callback on the wakeups ledger entry.
7521 		 */
7522 #if CONFIG_TELEMETRY
7523 		telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
7524 #endif
7525 		ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
7526 		ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
7527 	}
7528 
7529 	task_unlock(task);
7530 	return KERN_SUCCESS;
7531 }
7532 
7533 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)7534 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7535 {
7536 	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7537 #if CONFIG_TELEMETRY
7538 		/*
7539 		 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
7540 		 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
7541 		 */
7542 		telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
7543 #endif
7544 		return;
7545 	}
7546 
7547 #if CONFIG_TELEMETRY
7548 	/*
7549 	 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
7550 	 * exceeded the limit, turn telemetry off for the task.
7551 	 */
7552 	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
7553 #endif
7554 
7555 	if (warning == 0) {
7556 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
7557 	}
7558 }
7559 
7560 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)7561 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
7562 {
7563 	task_t                      task        = current_task();
7564 	int                         pid         = 0;
7565 	const char                  *procname   = "unknown";
7566 	boolean_t                   fatal;
7567 	kern_return_t               kr;
7568 #ifdef EXC_RESOURCE_MONITORS
7569 	mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
7570 #endif /* EXC_RESOURCE_MONITORS */
7571 	struct ledger_entry_info    lei;
7572 
7573 #ifdef MACH_BSD
7574 	pid = proc_selfpid();
7575 	if (task->bsd_info != NULL) {
7576 		procname = proc_name_address(current_task()->bsd_info);
7577 	}
7578 #endif
7579 
7580 	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
7581 
7582 	/*
7583 	 * Disable the exception notification so we don't overwhelm
7584 	 * the listener with an endless stream of redundant exceptions.
7585 	 * TODO: detect whether another thread is already reporting the violation.
7586 	 */
7587 	uint32_t flags = WAKEMON_DISABLE;
7588 	task_wakeups_monitor_ctl(task, &flags, NULL);
7589 
7590 	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7591 	trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
7592 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
7593 	    "over ~%llu seconds, averaging %llu wakes / second and "
7594 	    "violating a %slimit of %llu wakes over %llu seconds.\n",
7595 	    procname, pid,
7596 	    lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
7597 	    lei.lei_last_refill == 0 ? 0 :
7598 	    (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
7599 	    fatal ? "FATAL " : "",
7600 	    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
7601 
7602 	kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
7603 	    fatal ? kRNFatalLimitFlag : 0);
7604 	if (kr) {
7605 		printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
7606 	}
7607 
7608 #ifdef EXC_RESOURCE_MONITORS
7609 	if (disable_exc_resource) {
7610 		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7611 		    "supressed by a boot-arg\n", procname, pid);
7612 		return;
7613 	}
7614 	if (audio_active) {
7615 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7616 		    "supressed due to audio playback\n", procname, pid);
7617 		return;
7618 	}
7619 	if (lei.lei_last_refill == 0) {
7620 		os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7621 		    "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
7622 	}
7623 
7624 	code[0] = code[1] = 0;
7625 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
7626 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
7627 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
7628 	    NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
7629 	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
7630 	    lei.lei_last_refill);
7631 	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
7632 	    NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
7633 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7634 #endif /* EXC_RESOURCE_MONITORS */
7635 
7636 	if (fatal) {
7637 		task_terminate_internal(task);
7638 	}
7639 }
7640 
7641 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)7642 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
7643 {
7644 	int64_t old_count, new_count;
7645 	boolean_t needs_telemetry;
7646 
7647 	do {
7648 		new_count = old_count = *global_write_count;
7649 		new_count += io_delta;
7650 		if (new_count >= io_telemetry_limit) {
7651 			new_count = 0;
7652 			needs_telemetry = TRUE;
7653 		} else {
7654 			needs_telemetry = FALSE;
7655 		}
7656 	} while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
7657 	return needs_telemetry;
7658 }
7659 
7660 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)7661 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
7662 {
7663 #if CONFIG_PHYS_WRITE_ACCT
7664 	if (!io_size) {
7665 		return;
7666 	}
7667 
7668 	/*
7669 	 * task == NULL means that we have to update kernel_task ledgers
7670 	 */
7671 	if (!task) {
7672 		task = kernel_task;
7673 	}
7674 
7675 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
7676 	    task_pid(task), flavor, io_size, flags, 0);
7677 	DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
7678 
7679 	if (flags & TASK_BALANCE_CREDIT) {
7680 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7681 			OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7682 			ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7683 		}
7684 	} else if (flags & TASK_BALANCE_DEBIT) {
7685 		if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7686 			OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7687 			ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7688 		}
7689 	}
7690 #endif /* CONFIG_PHYS_WRITE_ACCT */
7691 }
7692 
7693 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)7694 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
7695 {
7696 	int64_t io_delta = 0;
7697 	int64_t * global_counter_to_update;
7698 	boolean_t needs_telemetry = FALSE;
7699 	boolean_t is_external_device = FALSE;
7700 	int ledger_to_update = 0;
7701 	struct task_writes_counters * writes_counters_to_update;
7702 
7703 	if ((!task) || (!io_size) || (!vp)) {
7704 		return;
7705 	}
7706 
7707 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
7708 	    task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
7709 	DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
7710 
7711 	// Is the drive backing this vnode internal or external to the system?
7712 	if (vnode_isonexternalstorage(vp) == false) {
7713 		global_counter_to_update = &global_logical_writes_count;
7714 		ledger_to_update = task_ledgers.logical_writes;
7715 		writes_counters_to_update = &task->task_writes_counters_internal;
7716 		is_external_device = FALSE;
7717 	} else {
7718 		global_counter_to_update = &global_logical_writes_to_external_count;
7719 		ledger_to_update = task_ledgers.logical_writes_to_external;
7720 		writes_counters_to_update = &task->task_writes_counters_external;
7721 		is_external_device = TRUE;
7722 	}
7723 
7724 	switch (flags) {
7725 	case TASK_WRITE_IMMEDIATE:
7726 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
7727 		ledger_credit(task->ledger, ledger_to_update, io_size);
7728 		if (!is_external_device) {
7729 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7730 		}
7731 		break;
7732 	case TASK_WRITE_DEFERRED:
7733 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
7734 		ledger_credit(task->ledger, ledger_to_update, io_size);
7735 		if (!is_external_device) {
7736 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7737 		}
7738 		break;
7739 	case TASK_WRITE_INVALIDATED:
7740 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
7741 		ledger_debit(task->ledger, ledger_to_update, io_size);
7742 		if (!is_external_device) {
7743 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
7744 		}
7745 		break;
7746 	case TASK_WRITE_METADATA:
7747 		OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
7748 		ledger_credit(task->ledger, ledger_to_update, io_size);
7749 		if (!is_external_device) {
7750 			coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7751 		}
7752 		break;
7753 	}
7754 
7755 	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
7756 	if (io_telemetry_limit != 0) {
7757 		/* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
7758 		needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
7759 		if (needs_telemetry && !is_external_device) {
7760 			act_set_io_telemetry_ast(current_thread());
7761 		}
7762 	}
7763 }
7764 
7765 /*
7766  * Control the I/O monitor for a task.
7767  */
7768 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)7769 task_io_monitor_ctl(task_t task, uint32_t *flags)
7770 {
7771 	ledger_t ledger = task->ledger;
7772 
7773 	task_lock(task);
7774 	if (*flags & IOMON_ENABLE) {
7775 		/* Configure the physical I/O ledger */
7776 		ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
7777 		ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
7778 	} else if (*flags & IOMON_DISABLE) {
7779 		/*
7780 		 * Caller wishes to disable I/O monitor on the task.
7781 		 */
7782 		ledger_disable_refill(ledger, task_ledgers.physical_writes);
7783 		ledger_disable_callback(ledger, task_ledgers.physical_writes);
7784 	}
7785 
7786 	task_unlock(task);
7787 	return KERN_SUCCESS;
7788 }
7789 
7790 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)7791 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
7792 {
7793 	if (warning == 0) {
7794 		SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
7795 	}
7796 }
7797 
7798 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)7799 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
7800 {
7801 	int                             pid = 0;
7802 	task_t                          task = current_task();
7803 #ifdef EXC_RESOURCE_MONITORS
7804 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7805 #endif /* EXC_RESOURCE_MONITORS */
7806 	struct ledger_entry_info        lei;
7807 	kern_return_t                   kr;
7808 
7809 #ifdef MACH_BSD
7810 	pid = proc_selfpid();
7811 #endif
7812 	/*
7813 	 * Get the ledger entry info. We need to do this before disabling the exception
7814 	 * to get correct values for all fields.
7815 	 */
7816 	switch (flavor) {
7817 	case FLAVOR_IO_PHYSICAL_WRITES:
7818 		ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
7819 		break;
7820 	}
7821 
7822 
7823 	/*
7824 	 * Disable the exception notification so we don't overwhelm
7825 	 * the listener with an endless stream of redundant exceptions.
7826 	 * TODO: detect whether another thread is already reporting the violation.
7827 	 */
7828 	uint32_t flags = IOMON_DISABLE;
7829 	task_io_monitor_ctl(task, &flags);
7830 
7831 	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
7832 		trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
7833 	}
7834 	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
7835 	    pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
7836 
7837 	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
7838 	if (kr) {
7839 		printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
7840 	}
7841 
7842 #ifdef EXC_RESOURCE_MONITORS
7843 	code[0] = code[1] = 0;
7844 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
7845 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
7846 	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
7847 	EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
7848 	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
7849 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7850 #endif /* EXC_RESOURCE_MONITORS */
7851 }
7852 
7853 void
task_port_space_ast(__unused task_t task)7854 task_port_space_ast(__unused task_t task)
7855 {
7856 	uint32_t current_size, soft_limit, hard_limit;
7857 	assert(task == current_task());
7858 	kern_return_t ret = ipc_space_get_table_size_and_limits(task->itk_space,
7859 	    &current_size, &soft_limit, &hard_limit);
7860 	if (ret == KERN_SUCCESS) {
7861 		SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
7862 	}
7863 }
7864 
7865 #if CONFIG_PROC_RESOURCE_LIMITS
7866 static mach_port_t
task_allocate_fatal_port(void)7867 task_allocate_fatal_port(void)
7868 {
7869 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7870 	task_id_token_t token;
7871 
7872 	kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
7873 	if (kr) {
7874 		return MACH_PORT_NULL;
7875 	}
7876 	task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
7877 	    IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
7878 
7879 	task_id_token_set_port(token, task_fatal_port);
7880 
7881 	return task_fatal_port;
7882 }
7883 
7884 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)7885 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
7886 {
7887 	task_t task = TASK_NULL;
7888 	kern_return_t kr;
7889 
7890 	task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
7891 
7892 	assert(token != NULL);
7893 	if (token) {
7894 		kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
7895 		if (task) {
7896 			task_bsdtask_kill(task);
7897 			task_deallocate(task);
7898 		}
7899 		task_id_token_release(token); /* consumes ref given by notification */
7900 	}
7901 }
7902 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7903 
7904 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)7905 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
7906 {
7907 	int pid = 0;
7908 	char *procname = (char *) "unknown";
7909 	__unused kern_return_t kr;
7910 	__unused resource_notify_flags_t flags = kRNFlagsNone;
7911 	__unused uint32_t limit;
7912 	__unused mach_port_t task_fatal_port = MACH_PORT_NULL;
7913 	mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
7914 
7915 #ifdef MACH_BSD
7916 	pid = proc_selfpid();
7917 	if (task->bsd_info != NULL) {
7918 		procname = proc_name_address(task->bsd_info);
7919 	}
7920 #endif
7921 	/*
7922 	 * Only kernel_task and launchd may be allowed to
7923 	 * have really large ipc space.
7924 	 */
7925 	if (pid == 0 || pid == 1) {
7926 		return;
7927 	}
7928 
7929 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
7930 	    Num of ports allocated %u; \n", procname, pid, current_size);
7931 
7932 	/* Abort the process if it has hit the system-wide limit for ipc port table size */
7933 	if (!hard_limit && !soft_limit) {
7934 		code[0] = code[1] = 0;
7935 		EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
7936 		EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
7937 		EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
7938 
7939 		exit_with_port_space_exception(current_proc(), code[0], code[1]);
7940 
7941 		return;
7942 	}
7943 
7944 #if CONFIG_PROC_RESOURCE_LIMITS
7945 	if (hard_limit > 0) {
7946 		flags |= kRNHardLimitFlag;
7947 		limit = hard_limit;
7948 		task_fatal_port = task_allocate_fatal_port();
7949 		if (!task_fatal_port) {
7950 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
7951 			task_bsdtask_kill(task);
7952 		}
7953 	} else {
7954 		flags |= kRNSoftLimitFlag;
7955 		limit = soft_limit;
7956 	}
7957 
7958 	kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
7959 	if (kr) {
7960 		os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
7961 	}
7962 	if (task_fatal_port) {
7963 		ipc_port_release_send(task_fatal_port);
7964 	}
7965 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7966 }
7967 
7968 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)7969 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
7970 {
7971 #if CONFIG_PROC_RESOURCE_LIMITS
7972 	assert(task == current_task());
7973 	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
7974 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7975 }
7976 
7977 #if CONFIG_PROC_RESOURCE_LIMITS
7978 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)7979 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
7980 {
7981 	int pid = 0;
7982 	char *procname = (char *) "unknown";
7983 	kern_return_t kr;
7984 	resource_notify_flags_t flags = kRNFlagsNone;
7985 	int limit;
7986 	mach_port_t task_fatal_port = MACH_PORT_NULL;
7987 
7988 #ifdef MACH_BSD
7989 	pid = proc_selfpid();
7990 	if (task->bsd_info != NULL) {
7991 		procname = proc_name_address(task->bsd_info);
7992 	}
7993 #endif
7994 	/*
7995 	 * Only kernel_task and launchd may be allowed to
7996 	 * have really large ipc space.
7997 	 */
7998 	if (pid == 0 || pid == 1) {
7999 		return;
8000 	}
8001 
8002 	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8003 	    Num of fds allocated %u; \n", procname, pid, current_size);
8004 
8005 	if (hard_limit > 0) {
8006 		flags |= kRNHardLimitFlag;
8007 		limit = hard_limit;
8008 		task_fatal_port = task_allocate_fatal_port();
8009 		if (!task_fatal_port) {
8010 			os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8011 			task_bsdtask_kill(task);
8012 		}
8013 	} else {
8014 		flags |= kRNSoftLimitFlag;
8015 		limit = soft_limit;
8016 	}
8017 
8018 	kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8019 	if (kr) {
8020 		os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8021 	}
8022 	if (task_fatal_port) {
8023 		ipc_port_release_send(task_fatal_port);
8024 	}
8025 }
8026 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8027 
8028 /* Placeholders for the task set/get voucher interfaces */
8029 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)8030 task_get_mach_voucher(
8031 	task_t                  task,
8032 	mach_voucher_selector_t __unused which,
8033 	ipc_voucher_t           *voucher)
8034 {
8035 	if (TASK_NULL == task) {
8036 		return KERN_INVALID_TASK;
8037 	}
8038 
8039 	*voucher = NULL;
8040 	return KERN_SUCCESS;
8041 }
8042 
8043 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)8044 task_set_mach_voucher(
8045 	task_t                  task,
8046 	ipc_voucher_t           __unused voucher)
8047 {
8048 	if (TASK_NULL == task) {
8049 		return KERN_INVALID_TASK;
8050 	}
8051 
8052 	return KERN_SUCCESS;
8053 }
8054 
8055 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)8056 task_swap_mach_voucher(
8057 	__unused task_t         task,
8058 	__unused ipc_voucher_t  new_voucher,
8059 	ipc_voucher_t          *in_out_old_voucher)
8060 {
8061 	/*
8062 	 * Currently this function is only called from a MIG generated
8063 	 * routine which doesn't release the reference on the voucher
8064 	 * addressed by in_out_old_voucher. To avoid leaking this reference,
8065 	 * a call to release it has been added here.
8066 	 */
8067 	ipc_voucher_release(*in_out_old_voucher);
8068 	return KERN_NOT_SUPPORTED;
8069 }
8070 
8071 void
task_set_gpu_denied(task_t task,boolean_t denied)8072 task_set_gpu_denied(task_t task, boolean_t denied)
8073 {
8074 	task_lock(task);
8075 
8076 	if (denied) {
8077 		task->t_flags |= TF_GPU_DENIED;
8078 	} else {
8079 		task->t_flags &= ~TF_GPU_DENIED;
8080 	}
8081 
8082 	task_unlock(task);
8083 }
8084 
8085 boolean_t
task_is_gpu_denied(task_t task)8086 task_is_gpu_denied(task_t task)
8087 {
8088 	/* We don't need the lock to read this flag */
8089 	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8090 }
8091 
8092 
8093 uint64_t
get_task_memory_region_count(task_t task)8094 get_task_memory_region_count(task_t task)
8095 {
8096 	vm_map_t map;
8097 	map = (task == kernel_task) ? kernel_map: task->map;
8098 	return (uint64_t)get_map_nentries(map);
8099 }
8100 
8101 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)8102 kdebug_trace_dyld_internal(uint32_t base_code,
8103     struct dyld_kernel_image_info *info)
8104 {
8105 	static_assert(sizeof(info->uuid) >= 16);
8106 
8107 #if defined(__LP64__)
8108 	uint64_t *uuid = (uint64_t *)&(info->uuid);
8109 
8110 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8111 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8112 	    uuid[1], info->load_addr,
8113 	    (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8114 	    0);
8115 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8116 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8117 	    (uint64_t)info->fsobjid.fid_objno |
8118 	    ((uint64_t)info->fsobjid.fid_generation << 32),
8119 	    0, 0, 0, 0);
8120 #else /* defined(__LP64__) */
8121 	uint32_t *uuid = (uint32_t *)&(info->uuid);
8122 
8123 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8124 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8125 	    uuid[1], uuid[2], uuid[3], 0);
8126 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8127 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8128 	    (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8129 	    info->fsobjid.fid_objno, 0);
8130 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8131 	    KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8132 	    info->fsobjid.fid_generation, 0, 0, 0, 0);
8133 #endif /* !defined(__LP64__) */
8134 }
8135 
8136 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8137 kdebug_trace_dyld(task_t task, uint32_t base_code,
8138     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8139 {
8140 	kern_return_t kr;
8141 	dyld_kernel_image_info_array_t infos;
8142 	vm_map_offset_t map_data;
8143 	vm_offset_t data;
8144 
8145 	if (!infos_copy) {
8146 		return KERN_INVALID_ADDRESS;
8147 	}
8148 
8149 	if (!kdebug_enable ||
8150 	    !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8151 		vm_map_copy_discard(infos_copy);
8152 		return KERN_SUCCESS;
8153 	}
8154 
8155 	if (task == NULL || task != current_task()) {
8156 		return KERN_INVALID_TASK;
8157 	}
8158 
8159 	kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8160 	if (kr != KERN_SUCCESS) {
8161 		return kr;
8162 	}
8163 
8164 	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8165 
8166 	for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8167 		kdebug_trace_dyld_internal(base_code, &(infos[i]));
8168 	}
8169 
8170 	data = CAST_DOWN(vm_offset_t, map_data);
8171 	mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8172 	return KERN_SUCCESS;
8173 }
8174 
8175 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8176 task_register_dyld_image_infos(task_t task,
8177     dyld_kernel_image_info_array_t infos_copy,
8178     mach_msg_type_number_t infos_len)
8179 {
8180 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8181 	           (vm_map_copy_t)infos_copy, infos_len);
8182 }
8183 
8184 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8185 task_unregister_dyld_image_infos(task_t task,
8186     dyld_kernel_image_info_array_t infos_copy,
8187     mach_msg_type_number_t infos_len)
8188 {
8189 	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8190 	           (vm_map_copy_t)infos_copy, infos_len);
8191 }
8192 
8193 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8194 task_get_dyld_image_infos(__unused task_t task,
8195     __unused dyld_kernel_image_info_array_t * dyld_images,
8196     __unused mach_msg_type_number_t * dyld_imagesCnt)
8197 {
8198 	return KERN_NOT_SUPPORTED;
8199 }
8200 
8201 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8202 task_register_dyld_shared_cache_image_info(task_t task,
8203     dyld_kernel_image_info_t cache_img,
8204     __unused boolean_t no_cache,
8205     __unused boolean_t private_cache)
8206 {
8207 	if (task == NULL || task != current_task()) {
8208 		return KERN_INVALID_TASK;
8209 	}
8210 
8211 	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8212 	return KERN_SUCCESS;
8213 }
8214 
8215 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8216 task_register_dyld_set_dyld_state(__unused task_t task,
8217     __unused uint8_t dyld_state)
8218 {
8219 	return KERN_NOT_SUPPORTED;
8220 }
8221 
8222 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8223 task_register_dyld_get_process_state(__unused task_t task,
8224     __unused dyld_kernel_process_info_t * dyld_process_state)
8225 {
8226 	return KERN_NOT_SUPPORTED;
8227 }
8228 
8229 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8230 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8231     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8232 {
8233 #if MONOTONIC
8234 	task_t task = (task_t)task_insp;
8235 	kern_return_t kr = KERN_SUCCESS;
8236 	mach_msg_type_number_t size;
8237 
8238 	if (task == TASK_NULL) {
8239 		return KERN_INVALID_ARGUMENT;
8240 	}
8241 
8242 	size = *size_in_out;
8243 
8244 	switch (flavor) {
8245 	case TASK_INSPECT_BASIC_COUNTS: {
8246 		struct task_inspect_basic_counts *bc;
8247 		uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
8248 
8249 		if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8250 			kr = KERN_INVALID_ARGUMENT;
8251 			break;
8252 		}
8253 
8254 		mt_fixed_task_counts(task, task_counts);
8255 		bc = (struct task_inspect_basic_counts *)info_out;
8256 #ifdef MT_CORE_INSTRS
8257 		bc->instructions = task_counts[MT_CORE_INSTRS];
8258 #else /* defined(MT_CORE_INSTRS) */
8259 		bc->instructions = 0;
8260 #endif /* !defined(MT_CORE_INSTRS) */
8261 		bc->cycles = task_counts[MT_CORE_CYCLES];
8262 		size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8263 		break;
8264 	}
8265 	default:
8266 		kr = KERN_INVALID_ARGUMENT;
8267 		break;
8268 	}
8269 
8270 	if (kr == KERN_SUCCESS) {
8271 		*size_in_out = size;
8272 	}
8273 	return kr;
8274 #else /* MONOTONIC */
8275 #pragma unused(task_insp, flavor, info_out, size_in_out)
8276 	return KERN_NOT_SUPPORTED;
8277 #endif /* !MONOTONIC */
8278 }
8279 
8280 #if CONFIG_SECLUDED_MEMORY
8281 int num_tasks_can_use_secluded_mem = 0;
8282 
8283 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8284 task_set_can_use_secluded_mem(
8285 	task_t          task,
8286 	boolean_t       can_use_secluded_mem)
8287 {
8288 	if (!task->task_could_use_secluded_mem) {
8289 		return;
8290 	}
8291 	task_lock(task);
8292 	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8293 	task_unlock(task);
8294 }
8295 
8296 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8297 task_set_can_use_secluded_mem_locked(
8298 	task_t          task,
8299 	boolean_t       can_use_secluded_mem)
8300 {
8301 	assert(task->task_could_use_secluded_mem);
8302 	if (can_use_secluded_mem &&
8303 	    secluded_for_apps &&         /* global boot-arg */
8304 	    !task->task_can_use_secluded_mem) {
8305 		assert(num_tasks_can_use_secluded_mem >= 0);
8306 		OSAddAtomic(+1,
8307 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8308 		task->task_can_use_secluded_mem = TRUE;
8309 	} else if (!can_use_secluded_mem &&
8310 	    task->task_can_use_secluded_mem) {
8311 		assert(num_tasks_can_use_secluded_mem > 0);
8312 		OSAddAtomic(-1,
8313 		    (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8314 		task->task_can_use_secluded_mem = FALSE;
8315 	}
8316 }
8317 
8318 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8319 task_set_could_use_secluded_mem(
8320 	task_t          task,
8321 	boolean_t       could_use_secluded_mem)
8322 {
8323 	task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8324 }
8325 
8326 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8327 task_set_could_also_use_secluded_mem(
8328 	task_t          task,
8329 	boolean_t       could_also_use_secluded_mem)
8330 {
8331 	task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8332 }
8333 
8334 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8335 task_can_use_secluded_mem(
8336 	task_t          task,
8337 	boolean_t       is_alloc)
8338 {
8339 	if (task->task_can_use_secluded_mem) {
8340 		assert(task->task_could_use_secluded_mem);
8341 		assert(num_tasks_can_use_secluded_mem > 0);
8342 		return TRUE;
8343 	}
8344 	if (task->task_could_also_use_secluded_mem &&
8345 	    num_tasks_can_use_secluded_mem > 0) {
8346 		assert(num_tasks_can_use_secluded_mem > 0);
8347 		return TRUE;
8348 	}
8349 
8350 	/*
8351 	 * If a single task is using more than some large amount of
8352 	 * memory (i.e. secluded_shutoff_trigger) and is approaching
8353 	 * its task limit, allow it to dip into secluded and begin
8354 	 * suppression of rebuilding secluded memory until that task exits.
8355 	 */
8356 	if (is_alloc && secluded_shutoff_trigger != 0) {
8357 		uint64_t phys_used = get_task_phys_footprint(task);
8358 		uint64_t limit = get_task_phys_footprint_limit(task);
8359 		if (phys_used > secluded_shutoff_trigger &&
8360 		    limit > secluded_shutoff_trigger &&
8361 		    phys_used > limit - secluded_shutoff_headroom) {
8362 			start_secluded_suppression(task);
8363 			return TRUE;
8364 		}
8365 	}
8366 
8367 	return FALSE;
8368 }
8369 
8370 boolean_t
task_could_use_secluded_mem(task_t task)8371 task_could_use_secluded_mem(
8372 	task_t  task)
8373 {
8374 	return task->task_could_use_secluded_mem;
8375 }
8376 
8377 boolean_t
task_could_also_use_secluded_mem(task_t task)8378 task_could_also_use_secluded_mem(
8379 	task_t  task)
8380 {
8381 	return task->task_could_also_use_secluded_mem;
8382 }
8383 #endif /* CONFIG_SECLUDED_MEMORY */
8384 
8385 queue_head_t *
task_io_user_clients(task_t task)8386 task_io_user_clients(task_t task)
8387 {
8388 	return &task->io_user_clients;
8389 }
8390 
8391 void
task_set_message_app_suspended(task_t task,boolean_t enable)8392 task_set_message_app_suspended(task_t task, boolean_t enable)
8393 {
8394 	task->message_app_suspended = enable;
8395 }
8396 
8397 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8398 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8399 {
8400 	dst_task->vtimers = src_task->vtimers;
8401 }
8402 
8403 #if DEVELOPMENT || DEBUG
8404 int vm_region_footprint = 0;
8405 #endif /* DEVELOPMENT || DEBUG */
8406 
8407 boolean_t
task_self_region_footprint(void)8408 task_self_region_footprint(void)
8409 {
8410 #if DEVELOPMENT || DEBUG
8411 	if (vm_region_footprint) {
8412 		/* system-wide override */
8413 		return TRUE;
8414 	}
8415 #endif /* DEVELOPMENT || DEBUG */
8416 	return current_task()->task_region_footprint;
8417 }
8418 
8419 void
task_self_region_footprint_set(boolean_t newval)8420 task_self_region_footprint_set(
8421 	boolean_t newval)
8422 {
8423 	task_t  curtask;
8424 
8425 	curtask = current_task();
8426 	task_lock(curtask);
8427 	if (newval) {
8428 		curtask->task_region_footprint = TRUE;
8429 	} else {
8430 		curtask->task_region_footprint = FALSE;
8431 	}
8432 	task_unlock(curtask);
8433 }
8434 
8435 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)8436 task_set_darkwake_mode(task_t task, boolean_t set_mode)
8437 {
8438 	assert(task);
8439 
8440 	task_lock(task);
8441 
8442 	if (set_mode) {
8443 		task->t_flags |= TF_DARKWAKE_MODE;
8444 	} else {
8445 		task->t_flags &= ~(TF_DARKWAKE_MODE);
8446 	}
8447 
8448 	task_unlock(task);
8449 }
8450 
8451 boolean_t
task_get_darkwake_mode(task_t task)8452 task_get_darkwake_mode(task_t task)
8453 {
8454 	assert(task);
8455 	return (task->t_flags & TF_DARKWAKE_MODE) != 0;
8456 }
8457 
8458 /*
8459  * Set default behavior for task's control port and EXC_GUARD variants that have
8460  * settable behavior.
8461  *
8462  * Platform binaries typically have one behavior, third parties another -
8463  * but there are special exception we may need to account for.
8464  */
8465 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)8466 task_set_exc_guard_ctrl_port_default(
8467 	task_t task,
8468 	thread_t main_thread,
8469 	const char *name,
8470 	unsigned int namelen,
8471 	boolean_t is_simulated,
8472 	uint32_t platform,
8473 	uint32_t sdk)
8474 {
8475 	if (task->t_flags & TF_PLATFORM) {
8476 		/* set exc guard default behavior for first-party code */
8477 		task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
8478 
8479 		if (1 == task_pid(task)) {
8480 			/* special flags for inittask - delivery every instance as corpse */
8481 			task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
8482 		} else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
8483 			/* honor by-name default setting overrides */
8484 
8485 			int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
8486 
8487 			for (int i = 0; i < count; i++) {
8488 				const struct task_exc_guard_named_default *named_default =
8489 				    &task_exc_guard_named_defaults[i];
8490 				if (strncmp(named_default->name, name, namelen) == 0 &&
8491 				    strlen(named_default->name) == namelen) {
8492 					task->task_exc_guard = named_default->behavior;
8493 					break;
8494 				}
8495 			}
8496 		}
8497 
8498 		/* set control port options for 1p code, inherited from parent task by default */
8499 		task->task_control_port_options = (ipc_control_port_options & ICP_OPTIONS_1P_MASK);
8500 	} else {
8501 		/* set exc guard default behavior for third-party code */
8502 		task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
8503 		/* set control port options for 3p code, inherited from parent task by default */
8504 		task->task_control_port_options = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
8505 	}
8506 
8507 	if (is_simulated) {
8508 		/* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
8509 		if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
8510 		    (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
8511 		    (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
8512 			task->task_exc_guard = TASK_EXC_GUARD_NONE;
8513 		}
8514 		/* Disable protection for control ports for simulated binaries */
8515 		task->task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
8516 	}
8517 
8518 	task_set_immovable_pinned(task);
8519 	main_thread_set_immovable_pinned(main_thread);
8520 }
8521 
8522 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)8523 task_get_exc_guard_behavior(
8524 	task_t task,
8525 	task_exc_guard_behavior_t *behaviorp)
8526 {
8527 	if (task == TASK_NULL) {
8528 		return KERN_INVALID_TASK;
8529 	}
8530 	*behaviorp = task->task_exc_guard;
8531 	return KERN_SUCCESS;
8532 }
8533 
8534 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)8535 task_set_exc_guard_behavior(
8536 	task_t task,
8537 	task_exc_guard_behavior_t new_behavior)
8538 {
8539 	if (task == TASK_NULL) {
8540 		return KERN_INVALID_TASK;
8541 	}
8542 	if (new_behavior & ~TASK_EXC_GUARD_ALL) {
8543 		return KERN_INVALID_VALUE;
8544 	}
8545 
8546 	/* limit setting to that allowed for this config */
8547 	new_behavior = new_behavior & task_exc_guard_config_mask;
8548 
8549 #if !defined (DEBUG) && !defined (DEVELOPMENT)
8550 	/* On release kernels, only allow _upgrading_ exc guard behavior */
8551 	task_exc_guard_behavior_t cur_behavior;
8552 
8553 	os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
8554 		if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
8555 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8556 		}
8557 
8558 		if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
8559 		        os_atomic_rmw_loop_give_up(return KERN_DENIED);
8560 		}
8561 
8562 		/* no restrictions on CORPSE bit */
8563 	});
8564 #else
8565 	task->task_exc_guard = new_behavior;
8566 #endif
8567 	return KERN_SUCCESS;
8568 }
8569 
8570 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)8571 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
8572 {
8573 #if DEVELOPMENT || DEBUG
8574 	if (task == TASK_NULL) {
8575 		return KERN_INVALID_TASK;
8576 	}
8577 
8578 	task_lock(task);
8579 	if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
8580 		task->t_flags |= TF_NO_CORPSE_FORKING;
8581 	} else {
8582 		task->t_flags &= ~TF_NO_CORPSE_FORKING;
8583 	}
8584 	task_unlock(task);
8585 
8586 	return KERN_SUCCESS;
8587 #else
8588 	(void)task;
8589 	(void)behavior;
8590 	return KERN_NOT_SUPPORTED;
8591 #endif
8592 }
8593 
8594 boolean_t
task_corpse_forking_disabled(task_t task)8595 task_corpse_forking_disabled(task_t task)
8596 {
8597 	boolean_t disabled = FALSE;
8598 
8599 	task_lock(task);
8600 	disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
8601 	task_unlock(task);
8602 
8603 	return disabled;
8604 }
8605 
8606 #if __arm64__
8607 extern int legacy_footprint_entitlement_mode;
8608 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
8609 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
8610 
8611 
8612 void
task_set_legacy_footprint(task_t task)8613 task_set_legacy_footprint(
8614 	task_t task)
8615 {
8616 	task_lock(task);
8617 	task->task_legacy_footprint = TRUE;
8618 	task_unlock(task);
8619 }
8620 
8621 void
task_set_extra_footprint_limit(task_t task)8622 task_set_extra_footprint_limit(
8623 	task_t task)
8624 {
8625 	if (task->task_extra_footprint_limit) {
8626 		return;
8627 	}
8628 	task_lock(task);
8629 	if (task->task_extra_footprint_limit) {
8630 		task_unlock(task);
8631 		return;
8632 	}
8633 	task->task_extra_footprint_limit = TRUE;
8634 	task_unlock(task);
8635 	memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
8636 }
8637 
8638 void
task_set_ios13extended_footprint_limit(task_t task)8639 task_set_ios13extended_footprint_limit(
8640 	task_t task)
8641 {
8642 	if (task->task_ios13extended_footprint_limit) {
8643 		return;
8644 	}
8645 	task_lock(task);
8646 	if (task->task_ios13extended_footprint_limit) {
8647 		task_unlock(task);
8648 		return;
8649 	}
8650 	task->task_ios13extended_footprint_limit = TRUE;
8651 	task_unlock(task);
8652 	memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
8653 }
8654 #endif /* __arm64__ */
8655 
8656 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)8657 task_ledger_get_balance(
8658 	ledger_t        ledger,
8659 	int             ledger_idx)
8660 {
8661 	ledger_amount_t amount;
8662 	amount = 0;
8663 	ledger_get_balance(ledger, ledger_idx, &amount);
8664 	return amount;
8665 }
8666 
8667 /*
8668  * Gather the amount of memory counted in a task's footprint due to
8669  * being in a specific set of ledgers.
8670  */
8671 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)8672 task_ledgers_footprint(
8673 	ledger_t        ledger,
8674 	ledger_amount_t *ledger_resident,
8675 	ledger_amount_t *ledger_compressed)
8676 {
8677 	*ledger_resident = 0;
8678 	*ledger_compressed = 0;
8679 
8680 	/* purgeable non-volatile memory */
8681 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
8682 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
8683 
8684 	/* "default" tagged memory */
8685 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
8686 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
8687 
8688 	/* "network" currently never counts in the footprint... */
8689 
8690 	/* "media" tagged memory */
8691 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
8692 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
8693 
8694 	/* "graphics" tagged memory */
8695 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
8696 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
8697 
8698 	/* "neural" tagged memory */
8699 	*ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
8700 	*ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
8701 }
8702 
8703 #if CONFIG_MEMORYSTATUS
8704 /*
8705  * Credit any outstanding task dirty time to the ledger.
8706  * memstat_dirty_start is pushed forward to prevent any possibility of double
8707  * counting, making it safe to call this as often as necessary to ensure that
8708  * anyone reading the ledger gets up-to-date information.
8709  */
8710 void
task_ledger_settle_dirty_time(task_t t)8711 task_ledger_settle_dirty_time(task_t t)
8712 {
8713 	task_lock(t);
8714 
8715 	uint64_t start = t->memstat_dirty_start;
8716 	if (start) {
8717 		uint64_t now = mach_absolute_time();
8718 
8719 		uint64_t duration;
8720 		absolutetime_to_nanoseconds(now - start, &duration);
8721 
8722 		ledger_t ledger = get_task_ledger(t);
8723 		ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
8724 
8725 		t->memstat_dirty_start = now;
8726 	}
8727 
8728 	task_unlock(t);
8729 }
8730 #endif /* CONFIG_MEMORYSTATUS */
8731 
8732 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)8733 task_set_memory_ownership_transfer(
8734 	task_t    task,
8735 	boolean_t value)
8736 {
8737 	task_lock(task);
8738 	task->task_can_transfer_memory_ownership = !!value;
8739 	task_unlock(task);
8740 }
8741 
8742 #if DEVELOPMENT || DEBUG
8743 
8744 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)8745 task_set_no_footprint_for_debug(task_t task, boolean_t value)
8746 {
8747 	task_lock(task);
8748 	task->task_no_footprint_for_debug = !!value;
8749 	task_unlock(task);
8750 }
8751 
8752 int
task_get_no_footprint_for_debug(task_t task)8753 task_get_no_footprint_for_debug(task_t task)
8754 {
8755 	return task->task_no_footprint_for_debug;
8756 }
8757 
8758 #endif /* DEVELOPMENT || DEBUG */
8759 
8760 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)8761 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
8762 {
8763 	vm_object_t find_vmo;
8764 	size_t size = 0;
8765 
8766 	task_objq_lock(task);
8767 	if (query != NULL) {
8768 		queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
8769 		{
8770 			vm_object_query_t p = &query[size++];
8771 
8772 			/* make sure to not overrun */
8773 			if (size * sizeof(vm_object_query_data_t) > len) {
8774 				--size;
8775 				break;
8776 			}
8777 
8778 			bzero(p, sizeof(*p));
8779 			p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
8780 			p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
8781 			p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
8782 			p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
8783 			p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
8784 			p->vo_no_footprint = find_vmo->vo_no_footprint;
8785 			p->vo_ledger_tag = find_vmo->vo_ledger_tag;
8786 			p->purgable = find_vmo->purgable;
8787 
8788 			if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
8789 				p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
8790 			} else {
8791 				p->compressed_size = 0;
8792 			}
8793 		}
8794 	} else {
8795 		size = (size_t)task->task_owned_objects;
8796 	}
8797 	task_objq_unlock(task);
8798 
8799 	*num = size;
8800 }
8801 
8802 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)8803 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
8804 {
8805 	assert(output_size);
8806 	assert(entries);
8807 
8808 	/* copy the vmobjects and vmobject data out of the task */
8809 	if (buffer_size == 0) {
8810 		task_copy_vmobjects(task, NULL, 0, entries);
8811 		*output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
8812 	} else {
8813 		assert(buffer);
8814 		task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
8815 		buffer->entries = (uint64_t)*entries;
8816 		*output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
8817 	}
8818 }
8819 
8820 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)8821 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
8822 {
8823 	size_t buffer_size;
8824 	vmobject_list_output_t buffer;
8825 	size_t output_size;
8826 	size_t entries;
8827 
8828 	assert(to_task != from_task);
8829 
8830 	/* get the size, allocate a bufferr, and populate */
8831 	entries = 0;
8832 	output_size = 0;
8833 	task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
8834 
8835 	if (output_size) {
8836 		buffer_size = output_size;
8837 		buffer = kalloc_data(buffer_size, Z_WAITOK);
8838 
8839 		if (buffer) {
8840 			entries = 0;
8841 			output_size = 0;
8842 
8843 			task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
8844 
8845 			if (entries) {
8846 				to_task->corpse_vmobject_list = buffer;
8847 				to_task->corpse_vmobject_list_size = buffer_size;
8848 			}
8849 		}
8850 	}
8851 }
8852 
8853 void
task_set_filter_msg_flag(task_t task,boolean_t flag)8854 task_set_filter_msg_flag(
8855 	task_t task,
8856 	boolean_t flag)
8857 {
8858 	assert(task != TASK_NULL);
8859 
8860 	task_lock(task);
8861 	if (flag) {
8862 		task->t_flags |= TF_FILTER_MSG;
8863 	} else {
8864 		task->t_flags &= ~TF_FILTER_MSG;
8865 	}
8866 	task_unlock(task);
8867 }
8868 
8869 boolean_t
task_get_filter_msg_flag(task_t task)8870 task_get_filter_msg_flag(
8871 	task_t task)
8872 {
8873 	uint32_t flags = 0;
8874 
8875 	if (!task) {
8876 		return false;
8877 	}
8878 
8879 	flags = os_atomic_load(&task->t_flags, relaxed);
8880 	return (flags & TF_FILTER_MSG) ? TRUE : FALSE;
8881 }
8882 bool
task_is_exotic(task_t task)8883 task_is_exotic(
8884 	task_t task)
8885 {
8886 	if (task == TASK_NULL) {
8887 		return false;
8888 	}
8889 	return vm_map_is_exotic(get_task_map(task));
8890 }
8891 
8892 bool
task_is_alien(task_t task)8893 task_is_alien(
8894 	task_t task)
8895 {
8896 	if (task == TASK_NULL) {
8897 		return false;
8898 	}
8899 	return vm_map_is_alien(get_task_map(task));
8900 }
8901 
8902 
8903 
8904 #if CONFIG_MACF
8905 /* Set the filter mask for Mach traps. */
8906 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)8907 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
8908 {
8909 	assert(task);
8910 
8911 	task_set_mach_trap_filter_mask(task, maskptr);
8912 }
8913 
8914 /* Set the filter mask for kobject msgs. */
8915 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)8916 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
8917 {
8918 	assert(task);
8919 
8920 	task_set_mach_kobj_filter_mask(task, maskptr);
8921 }
8922 
8923 /* Hook for mach trap/sc filter evaluation policy. */
8924 mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate = NULL;
8925 
8926 /* Hook for kobj message filter evaluation policy. */
8927 mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate = NULL;
8928 
8929 /* Set the callback hooks for the filtering policy. */
8930 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)8931 mac_task_register_filter_callbacks(
8932 	const mac_task_mach_filter_cbfunc_t mach_cbfunc,
8933 	const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
8934 {
8935 	if (mach_cbfunc != NULL) {
8936 		if (mac_task_mach_trap_evaluate != NULL) {
8937 			return KERN_FAILURE;
8938 		}
8939 		mac_task_mach_trap_evaluate = mach_cbfunc;
8940 	}
8941 	if (kobj_cbfunc != NULL) {
8942 		if (mac_task_kobj_msg_evaluate != NULL) {
8943 			return KERN_FAILURE;
8944 		}
8945 		mac_task_kobj_msg_evaluate = kobj_cbfunc;
8946 	}
8947 
8948 	return KERN_SUCCESS;
8949 }
8950 #endif /* CONFIG_MACF */
8951 
8952 void
task_transfer_mach_filter_bits(task_t new_task,task_t old_task)8953 task_transfer_mach_filter_bits(
8954 	task_t new_task,
8955 	task_t old_task)
8956 {
8957 #ifdef CONFIG_MACF
8958 	/* Copy mach trap and kernel object mask pointers to new task. */
8959 	task_copy_filter_masks(new_task, old_task);
8960 #endif
8961 	/* If filter message flag is set then set it in the new task. */
8962 	if (task_get_filter_msg_flag(old_task)) {
8963 		new_task->t_flags |= TF_FILTER_MSG;
8964 	}
8965 }
8966 
8967 
8968 #if __has_feature(ptrauth_calls)
8969 /* All pac violations will be delivered as fatal exceptions irrespective of
8970  * the enable_pac_exception boot-arg value.
8971  */
8972 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
8973 /*
8974  * When enable_pac_exception boot-arg is set to true, processes
8975  * can choose to get non-fatal pac exception delivery by setting
8976  * this entitlement.
8977  */
8978 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
8979 
8980 void
task_set_pac_exception_fatal_flag(task_t task)8981 task_set_pac_exception_fatal_flag(
8982 	task_t task)
8983 {
8984 	assert(task != TASK_NULL);
8985 	bool pac_entitlement = false;
8986 
8987 	if (enable_pac_exception && IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
8988 		return;
8989 	}
8990 
8991 	if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
8992 		pac_entitlement = true;
8993 	}
8994 
8995 	task_lock(task);
8996 	if (pac_entitlement || (enable_pac_exception && task->t_flags & TF_PLATFORM)) {
8997 		task->t_flags |= TF_PAC_EXC_FATAL;
8998 	}
8999 	task_unlock(task);
9000 }
9001 
9002 bool
task_is_pac_exception_fatal(task_t task)9003 task_is_pac_exception_fatal(
9004 	task_t task)
9005 {
9006 	uint32_t flags = 0;
9007 
9008 	assert(task != TASK_NULL);
9009 
9010 	flags = os_atomic_load(&task->t_flags, relaxed);
9011 	return (bool)(flags & TF_PAC_EXC_FATAL);
9012 }
9013 #endif /* __has_feature(ptrauth_calls) */
9014 
9015 void
task_set_tecs(task_t task)9016 task_set_tecs(task_t task)
9017 {
9018 	if (task == TASK_NULL) {
9019 		task = current_task();
9020 	}
9021 
9022 	if (!machine_csv(CPUVN_CI)) {
9023 		return;
9024 	}
9025 
9026 	LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9027 
9028 	task_lock(task);
9029 
9030 	task->t_flags |= TF_TECS;
9031 
9032 	thread_t thread;
9033 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
9034 		machine_tecs(thread);
9035 	}
9036 	task_unlock(task);
9037 }
9038 
9039 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)9040 task_test_sync_upcall(
9041 	task_t     task,
9042 	ipc_port_t send_port)
9043 {
9044 #if DEVELOPMENT || DEBUG
9045 	if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9046 		return KERN_INVALID_ARGUMENT;
9047 	}
9048 
9049 	/* Block on sync kernel upcall on the given send port */
9050 	mach_test_sync_upcall(send_port);
9051 
9052 	ipc_port_release_send(send_port);
9053 	return KERN_SUCCESS;
9054 #else
9055 	(void)task;
9056 	(void)send_port;
9057 	return KERN_NOT_SUPPORTED;
9058 #endif
9059 }
9060 
9061 #if CONFIG_PROC_RESOURCE_LIMITS
9062 mach_port_name_t
current_task_get_fatal_port_name(void)9063 current_task_get_fatal_port_name(void)
9064 {
9065 	mach_port_t task_fatal_port = MACH_PORT_NULL;
9066 	mach_port_name_t port_name = 0;
9067 
9068 	task_fatal_port = task_allocate_fatal_port();
9069 
9070 	if (task_fatal_port) {
9071 		ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9072 		    IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9073 	}
9074 
9075 	return port_name;
9076 }
9077 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9078 
9079 #if defined(__x86_64__)
9080 bool
curtask_get_insn_copy_optout(void)9081 curtask_get_insn_copy_optout(void)
9082 {
9083 	bool optout;
9084 	task_t cur_task = current_task();
9085 
9086 	task_lock(cur_task);
9087 	optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9088 	task_unlock(cur_task);
9089 
9090 	return optout;
9091 }
9092 
9093 void
curtask_set_insn_copy_optout(void)9094 curtask_set_insn_copy_optout(void)
9095 {
9096 	task_t cur_task = current_task();
9097 
9098 	task_lock(cur_task);
9099 
9100 	cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9101 
9102 	thread_t thread;
9103 	queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9104 		machine_thread_set_insn_copy_optout(thread);
9105 	}
9106 	task_unlock(cur_task);
9107 }
9108 #endif /* defined(__x86_64__) */
9109 
9110 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9111 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9112 {
9113 	assert(task);
9114 	assert(list_size);
9115 
9116 	*list = task->corpse_vmobject_list;
9117 	*list_size = (size_t)task->corpse_vmobject_list_size;
9118 }
9119 
9120 __abortlike
9121 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9122 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9123 {
9124 	panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9125 	    "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9126 }
9127 
9128 proc_ro_t
task_get_ro(task_t t)9129 task_get_ro(task_t t)
9130 {
9131 	proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9132 
9133 	zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9134 	if (__improbable(proc_ro_task(ro) != t)) {
9135 		panic_proc_ro_task_backref_mismatch(t, ro);
9136 	}
9137 
9138 	return ro;
9139 }
9140