1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to [email protected] any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/recount.h>
120 #include <kern/sched_prim.h> /* for thread_wakeup */
121 #include <kern/ipc_tt.h>
122 #include <kern/host.h>
123 #include <kern/clock.h>
124 #include <kern/timer.h>
125 #include <kern/assert.h>
126 #include <kern/affinity.h>
127 #include <kern/exc_resource.h>
128 #include <kern/machine.h>
129 #include <kern/policy_internal.h>
130 #include <kern/restartable.h>
131 #include <kern/ipc_kobject.h>
132
133 #include <corpses/task_corpse.h>
134 #if CONFIG_TELEMETRY
135 #include <kern/telemetry.h>
136 #endif
137
138 #if CONFIG_PERVASIVE_CPI
139 #include <kern/monotonic.h>
140 #include <machine/monotonic.h>
141 #endif /* CONFIG_PERVASIVE_CPI */
142
143 #if CONFIG_EXCLAVES
144 #include "exclaves_boot.h"
145 #include "exclaves_resource.h"
146 #include "exclaves_boot.h"
147 #include "kern/exclaves.tightbeam.h"
148 #endif /* CONFIG_EXCLAVES */
149
150 #include <os/log.h>
151
152 #include <vm/pmap.h>
153 #include <vm/vm_map.h>
154 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
155 #include <vm/vm_pageout.h>
156 #include <vm/vm_protos.h>
157 #include <vm/vm_purgeable_internal.h>
158 #include <vm/vm_compressor_pager.h>
159 #include <vm/vm_reclaim_internal.h>
160
161 #include <sys/proc_ro.h>
162 #include <sys/resource.h>
163 #include <sys/signalvar.h> /* for coredump */
164 #include <sys/bsdtask_info.h>
165 #include <sys/kdebug_triage.h>
166 #include <sys/code_signing.h> /* for address_space_debugged */
167 /*
168 * Exported interfaces
169 */
170
171 #include <mach/task_server.h>
172 #include <mach/mach_host_server.h>
173 #include <mach/mach_port_server.h>
174
175 #include <vm/vm_shared_region.h>
176
177 #include <libkern/OSDebug.h>
178 #include <libkern/OSAtomic.h>
179 #include <libkern/section_keywords.h>
180
181 #include <mach-o/loader.h>
182 #include <kdp/kdp_dyld.h>
183
184 #include <kern/sfi.h> /* picks up ledger.h */
185
186 #if CONFIG_MACF
187 #include <security/mac_mach_internal.h>
188 #endif
189
190 #include <IOKit/IOBSD.h>
191 #include <kdp/processor_core.h>
192
193 #include <string.h>
194
195 #if KPERF
196 extern int kpc_force_all_ctrs(task_t, int);
197 #endif
198
199 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
200
201 int64_t next_taskuniqueid = 0;
202 const size_t task_alignment = _Alignof(struct task);
203 extern const size_t proc_alignment;
204 extern size_t proc_struct_size;
205 extern size_t proc_and_task_size;
206 size_t task_struct_size;
207
208 extern uint32_t ipc_control_port_options;
209
210 extern int large_corpse_count;
211
212 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
213 extern void task_disown_frozen_csegs(task_t owner_task);
214
215 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
216 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
217 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
218 static inline void task_zone_init(void);
219
220 #if CONFIG_EXCLAVES
221 static bool task_should_panic_on_exit_due_to_conclave_taint(task_t task);
222 static bool task_is_conclave_tainted(task_t task);
223 static void task_set_conclave_taint(task_t task);
224 kern_return_t task_crash_info_conclave_upcall(task_t task,
225 const xnuupcalls_conclavesharedbuffer_s *shared_buf, uint32_t length);
226 kern_return_t
227 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *_Nonnull result, void *kcdata_ptr);
228 #endif /* CONFIG_EXCLAVES */
229
230 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
231 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
232 .iko_op_no_senders = task_port_no_senders);
233 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
234 .iko_op_no_senders = task_port_with_flavor_no_senders);
235 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
236 .iko_op_no_senders = task_port_with_flavor_no_senders);
237 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
238 .iko_op_no_senders = task_suspension_no_senders);
239
240 #if CONFIG_PROC_RESOURCE_LIMITS
241 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
242 static mach_port_t task_allocate_fatal_port(void);
243
244 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
245 .iko_op_stable = true,
246 .iko_op_no_senders = task_fatal_port_no_senders);
247
248 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
249 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
250
251 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
252 int audio_active = 0;
253
254 /*
255 * structure for tracking zone usage
256 * Used either one per task/thread for all zones or <per-task,per-zone>.
257 */
258 typedef struct zinfo_usage_store_t {
259 /* These fields may be updated atomically, and so must be 8 byte aligned */
260 uint64_t alloc __attribute__((aligned(8))); /* allocation counter */
261 uint64_t free __attribute__((aligned(8))); /* free counter */
262 } zinfo_usage_store_t;
263
264 /**
265 * Return codes related to diag threshold and memory limit
266 */
267 __options_decl(diagthreshold_check_return, int, {
268 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED = 0,
269 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED = 1,
270 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED = 2,
271 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED = 3,
272 });
273
274 /**
275 * Return codes related to diag threshold and memory limit
276 */
277 __options_decl(current_, int, {
278 THRESHOLD_IS_SAME_AS_LIMIT = 0,
279 THRESHOLD_IS_NOT_SAME_AS_LIMIT = 1
280 });
281
282 zinfo_usage_store_t tasks_tkm_private;
283 zinfo_usage_store_t tasks_tkm_shared;
284
285 /* A container to accumulate statistics for expired tasks */
286 expired_task_statistics_t dead_task_statistics;
287 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
288
289 ledger_template_t task_ledger_template = NULL;
290
291 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
292 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
293 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
294
295 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
296 {.cpu_time = -1,
297 .tkm_private = -1,
298 .tkm_shared = -1,
299 .phys_mem = -1,
300 .wired_mem = -1,
301 .internal = -1,
302 .iokit_mapped = -1,
303 .external = -1,
304 .reusable = -1,
305 .alternate_accounting = -1,
306 .alternate_accounting_compressed = -1,
307 .page_table = -1,
308 .phys_footprint = -1,
309 .internal_compressed = -1,
310 .purgeable_volatile = -1,
311 .purgeable_nonvolatile = -1,
312 .purgeable_volatile_compressed = -1,
313 .purgeable_nonvolatile_compressed = -1,
314 .tagged_nofootprint = -1,
315 .tagged_footprint = -1,
316 .tagged_nofootprint_compressed = -1,
317 .tagged_footprint_compressed = -1,
318 .network_volatile = -1,
319 .network_nonvolatile = -1,
320 .network_volatile_compressed = -1,
321 .network_nonvolatile_compressed = -1,
322 .media_nofootprint = -1,
323 .media_footprint = -1,
324 .media_nofootprint_compressed = -1,
325 .media_footprint_compressed = -1,
326 .graphics_nofootprint = -1,
327 .graphics_footprint = -1,
328 .graphics_nofootprint_compressed = -1,
329 .graphics_footprint_compressed = -1,
330 .neural_nofootprint = -1,
331 .neural_footprint = -1,
332 .neural_nofootprint_compressed = -1,
333 .neural_footprint_compressed = -1,
334 .platform_idle_wakeups = -1,
335 .interrupt_wakeups = -1,
336 #if CONFIG_SCHED_SFI
337 .sfi_wait_times = { 0 /* initialized at runtime */},
338 #endif /* CONFIG_SCHED_SFI */
339 .cpu_time_billed_to_me = -1,
340 .cpu_time_billed_to_others = -1,
341 .physical_writes = -1,
342 .logical_writes = -1,
343 .logical_writes_to_external = -1,
344 #if DEBUG || DEVELOPMENT
345 .pages_grabbed = -1,
346 .pages_grabbed_kern = -1,
347 .pages_grabbed_iopl = -1,
348 .pages_grabbed_upl = -1,
349 #endif
350 #if CONFIG_FREEZE
351 .frozen_to_swap = -1,
352 #endif /* CONFIG_FREEZE */
353 .energy_billed_to_me = -1,
354 .energy_billed_to_others = -1,
355 #if CONFIG_PHYS_WRITE_ACCT
356 .fs_metadata_writes = -1,
357 #endif /* CONFIG_PHYS_WRITE_ACCT */
358 #if CONFIG_MEMORYSTATUS
359 .memorystatus_dirty_time = -1,
360 #endif /* CONFIG_MEMORYSTATUS */
361 .swapins = -1,
362 .conclave_mem = -1, };
363
364 /* System sleep state */
365 boolean_t tasks_suspend_state;
366
367 __options_decl(send_exec_resource_is_fatal, bool, {
368 IS_NOT_FATAL = false,
369 IS_FATAL = true
370 });
371
372 __options_decl(send_exec_resource_is_diagnostics, bool, {
373 IS_NOT_DIAGNOSTICS = false,
374 IS_DIAGNOSTICS = true
375 });
376
377 __options_decl(send_exec_resource_is_warning, bool, {
378 IS_NOT_WARNING = false,
379 IS_WARNING = true
380 });
381
382 __options_decl(send_exec_resource_options_t, uint8_t, {
383 EXEC_RESOURCE_FATAL = 0x01,
384 EXEC_RESOURCE_DIAGNOSTIC = 0x02,
385 EXEC_RESOURCE_WARNING = 0x04,
386 });
387
388 /**
389 * Actions to take when a process has reached the memory limit or the diagnostics threshold limits
390 */
391 static inline void task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning);
392 #if DEBUG || DEVELOPMENT
393 static inline void task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size);
394 #endif
395 void init_task_ledgers(void);
396 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
397 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
398 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
399 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
400 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options);
401 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
402 #if CONFIG_PROC_RESOURCE_LIMITS
403 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
404 mach_port_name_t current_task_get_fatal_port_name(void);
405 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit);
406 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
407
408 kern_return_t task_suspend_internal_locked(task_t);
409 kern_return_t task_suspend_internal(task_t);
410 kern_return_t task_resume_internal_locked(task_t);
411 kern_return_t task_resume_internal(task_t);
412 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
413
414 extern kern_return_t iokit_task_terminate(task_t task, int phase);
415 extern void iokit_task_app_suspended_changed(task_t task);
416
417 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
418 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
419 extern kern_return_t thread_resume(thread_t thread);
420
421 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
422
423 // Condition to include diag footprints
424 #define RESETTABLE_DIAG_FOOTPRINT_LIMITS ((DEBUG || DEVELOPMENT) && CONFIG_MEMORYSTATUS)
425
426 // Warn tasks when they hit 80% of their memory limit.
427 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
428
429 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
430 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
431
432 /*
433 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
434 *
435 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
436 * stacktraces, aka micro-stackshots)
437 */
438 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
439
440 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
441 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
442
443 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
444
445 TUNABLE(bool, disable_exc_resource, "disable_exc_resource", false); /* Global override to suppress EXC_RESOURCE for resource monitor violations. */
446 TUNABLE(bool, disable_exc_resource_during_audio, "disable_exc_resource_during_audio", true); /* Global override to suppress EXC_RESOURCE while audio is active */
447
448 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
449 unsigned int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
450
451 /*
452 * Configure per-task memory limit.
453 * The boot-arg is interpreted as Megabytes,
454 * and takes precedence over the device tree.
455 * Setting the boot-arg to 0 disables task limits.
456 */
457 TUNABLE_DT_WRITEABLE(int, max_task_footprint_mb, "/defaults", "kern.max_task_pmem", "max_task_pmem", 0, TUNABLE_DT_NONE);
458
459 /* I/O Monitor Limits */
460 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
461 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
462
463 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
464 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
465
466 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
467 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
468 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
469 int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
470 static boolean_t global_update_logical_writes(int64_t, int64_t*);
471
472 #if DEBUG || DEVELOPMENT
473 static diagthreshold_check_return task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value);
474 #endif
475 #define TASK_MAX_THREAD_LIMIT 256
476
477 #if MACH_ASSERT
478 int pmap_ledgers_panic = 1;
479 int pmap_ledgers_panic_leeway = 3;
480 #endif /* MACH_ASSERT */
481
482 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
483
484 #if CONFIG_COREDUMP
485 int hwm_user_cores = 0; /* high watermark violations generate user core files */
486 #endif
487
488 #ifdef MACH_BSD
489 extern uint32_t proc_platform(const struct proc *);
490 extern uint32_t proc_sdk(struct proc *);
491 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
492 extern int proc_pid(struct proc *p);
493 extern int proc_selfpid(void);
494 extern struct proc *current_proc(void);
495 extern char *proc_name_address(struct proc *p);
496 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
497 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
498 extern void workq_proc_suspended(struct proc *p);
499 extern void workq_proc_resumed(struct proc *p);
500 extern struct proc *kernproc;
501
502 #if CONFIG_MEMORYSTATUS
503 extern void proc_memstat_skip(struct proc* p, boolean_t set);
504 extern void memorystatus_on_ledger_footprint_exceeded(int warning, bool memlimit_is_active, bool memlimit_is_fatal);
505 extern void memorystatus_log_exception(const int max_footprint_mb, bool memlimit_is_active, bool memlimit_is_fatal);
506 extern void memorystatus_log_diag_threshold_exception(const int diag_threshold_value);
507 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
508 extern uint64_t memorystatus_available_memory_internal(struct proc *p);
509
510 #if DEVELOPMENT || DEBUG
511 extern void memorystatus_abort_vm_map_fork(task_t);
512 #endif
513
514 #endif /* CONFIG_MEMORYSTATUS */
515
516 #endif /* MACH_BSD */
517
518 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
519 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
520
521 /*
522 * Defaults for controllable EXC_GUARD behaviors
523 *
524 * Internal builds are fatal by default (except BRIDGE).
525 * Create an alternate set of defaults for special processes by name.
526 */
527 struct task_exc_guard_named_default {
528 char *name;
529 uint32_t behavior;
530 };
531 #define _TASK_EXC_GUARD_MP_CORPSE (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
532 #define _TASK_EXC_GUARD_MP_ONCE (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
533 #define _TASK_EXC_GUARD_MP_FATAL (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
534
535 #define _TASK_EXC_GUARD_VM_CORPSE (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
536 #define _TASK_EXC_GUARD_VM_ONCE (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
537 #define _TASK_EXC_GUARD_VM_FATAL (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
538
539 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
540 #define _TASK_EXC_GUARD_ALL_ONCE (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
541 #define _TASK_EXC_GUARD_ALL_FATAL (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
542
543 /* cannot turn off FATAL and DELIVER bit if set */
544 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
545 TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
546 /* cannot turn on ONCE bit if unset */
547 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
548
549 #if !defined(XNU_TARGET_OS_BRIDGE)
550
551 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
552 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
553 /*
554 * These "by-process-name" default overrides are intended to be a short-term fix to
555 * quickly get over races between changes introducing new EXC_GUARD raising behaviors
556 * in some process and a change in default behavior for same. We should ship with
557 * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
558 * exception behavior via task_set_exc_guard_behavior()).
559 *
560 * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
561 * task_exc_guard_default when transitioning this list between empty and
562 * non-empty.
563 */
564 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
565
566 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
567
568 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
569 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
570 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
571
572 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
573
574 /* Forwards */
575
576 static void task_hold_locked(task_t task);
577 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
578 static void task_release_locked(task_t task);
579 extern task_t proc_get_task_raw(void *proc);
580 extern void task_ref_hold_proc_task_struct(task_t task);
581 extern void task_release_proc_task_struct(task_t task, proc_ro_t proc_ro);
582
583 static void task_synchronizer_destroy_all(task_t task);
584 static os_ref_count_t
585 task_add_turnstile_watchports_locked(
586 task_t task,
587 struct task_watchports *watchports,
588 struct task_watchport_elem **previous_elem_array,
589 ipc_port_t *portwatch_ports,
590 uint32_t portwatch_count);
591
592 static os_ref_count_t
593 task_remove_turnstile_watchports_locked(
594 task_t task,
595 struct task_watchports *watchports,
596 ipc_port_t *port_freelist);
597
598 static struct task_watchports *
599 task_watchports_alloc_init(
600 task_t task,
601 thread_t thread,
602 uint32_t count);
603
604 static void
605 task_watchports_deallocate(
606 struct task_watchports *watchports);
607
608 __attribute__((always_inline)) inline void
task_lock(task_t task)609 task_lock(task_t task)
610 {
611 lck_mtx_lock(&(task)->lock);
612 }
613
614 __attribute__((always_inline)) inline void
task_unlock(task_t task)615 task_unlock(task_t task)
616 {
617 lck_mtx_unlock(&(task)->lock);
618 }
619
620 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)621 task_set_64bit(
622 task_t task,
623 boolean_t is_64bit,
624 boolean_t is_64bit_data)
625 {
626 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
627 thread_t thread;
628 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
629
630 task_lock(task);
631
632 /*
633 * Switching to/from 64-bit address spaces
634 */
635 if (is_64bit) {
636 if (!task_has_64Bit_addr(task)) {
637 task_set_64Bit_addr(task);
638 }
639 } else {
640 if (task_has_64Bit_addr(task)) {
641 task_clear_64Bit_addr(task);
642 }
643 }
644
645 /*
646 * Switching to/from 64-bit register state.
647 */
648 if (is_64bit_data) {
649 if (task_has_64Bit_data(task)) {
650 goto out;
651 }
652
653 task_set_64Bit_data(task);
654 } else {
655 if (!task_has_64Bit_data(task)) {
656 goto out;
657 }
658
659 task_clear_64Bit_data(task);
660 }
661
662 /* FIXME: On x86, the thread save state flavor can diverge from the
663 * task's 64-bit feature flag due to the 32-bit/64-bit register save
664 * state dichotomy. Since we can be pre-empted in this interval,
665 * certain routines may observe the thread as being in an inconsistent
666 * state with respect to its task's 64-bitness.
667 */
668
669 #if defined(__x86_64__) || defined(__arm64__)
670 queue_iterate(&task->threads, thread, thread_t, task_threads) {
671 thread_mtx_lock(thread);
672 machine_thread_switch_addrmode(thread);
673 thread_mtx_unlock(thread);
674 }
675 #endif /* defined(__x86_64__) || defined(__arm64__) */
676
677 out:
678 task_unlock(task);
679 }
680
681 bool
task_get_64bit_addr(task_t task)682 task_get_64bit_addr(task_t task)
683 {
684 return task_has_64Bit_addr(task);
685 }
686
687 bool
task_get_64bit_data(task_t task)688 task_get_64bit_data(task_t task)
689 {
690 return task_has_64Bit_data(task);
691 }
692
693 void
task_set_platform_binary(task_t task,boolean_t is_platform)694 task_set_platform_binary(
695 task_t task,
696 boolean_t is_platform)
697 {
698 if (is_platform) {
699 task_ro_flags_set(task, TFRO_PLATFORM);
700 } else {
701 task_ro_flags_clear(task, TFRO_PLATFORM);
702 }
703 }
704
705 #if XNU_TARGET_OS_OSX
706 #if DEVELOPMENT || DEBUG
707 SECURITY_READ_ONLY_LATE(bool) AMFI_bootarg_disable_mach_hardening = false;
708 #endif /* DEVELOPMENT || DEBUG */
709
710 void
task_disable_mach_hardening(task_t task)711 task_disable_mach_hardening(task_t task)
712 {
713 task_ro_flags_set(task, TFRO_MACH_HARDENING_OPT_OUT);
714 }
715
716 bool
task_opted_out_mach_hardening(task_t task)717 task_opted_out_mach_hardening(task_t task)
718 {
719 if (!task) {
720 return false;
721 }
722 return task_ro_flags_get(task) & TFRO_MACH_HARDENING_OPT_OUT;
723 }
724 #endif /* XNU_TARGET_OS_OSX */
725
726 /*
727 * Use the `task_is_hardened_binary` macro below
728 * when applying new security policies.
729 *
730 * Kernel security policies now generally apply to
731 * "hardened binaries" - which are platform binaries, and
732 * third party binaries who adopt hardened runtime on ios.
733 */
734 boolean_t
task_get_platform_binary(task_t task)735 task_get_platform_binary(task_t task)
736 {
737 return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
738 }
739
740 static boolean_t
task_get_hardened_runtime(task_t task)741 task_get_hardened_runtime(task_t task)
742 {
743 return (task_ro_flags_get(task) & TFRO_HARDENED) != 0;
744 }
745
746 boolean_t
task_is_hardened_binary(task_t task)747 task_is_hardened_binary(task_t task)
748 {
749 return task_get_platform_binary(task) ||
750 task_get_hardened_runtime(task);
751 }
752
753 void
task_set_hardened_runtime(task_t task,bool is_hardened)754 task_set_hardened_runtime(
755 task_t task,
756 bool is_hardened)
757 {
758 if (is_hardened) {
759 task_ro_flags_set(task, TFRO_HARDENED);
760 } else {
761 task_ro_flags_clear(task, TFRO_HARDENED);
762 }
763 }
764
765 boolean_t
task_is_a_corpse(task_t task)766 task_is_a_corpse(task_t task)
767 {
768 return (task_ro_flags_get(task) & TFRO_CORPSE) != 0;
769 }
770
771 boolean_t
task_is_ipc_active(task_t task)772 task_is_ipc_active(task_t task)
773 {
774 return task->ipc_active;
775 }
776
777 void
task_set_corpse(task_t task)778 task_set_corpse(task_t task)
779 {
780 return task_ro_flags_set(task, TFRO_CORPSE);
781 }
782
783 void
task_set_immovable_pinned(task_t task)784 task_set_immovable_pinned(task_t task)
785 {
786 ipc_task_set_immovable_pinned(task);
787 }
788
789 /*
790 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
791 * Returns "false" if flag is already set, and "true" in other cases.
792 */
793 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)794 task_set_ca_client_wi(
795 task_t task,
796 boolean_t set_or_clear)
797 {
798 bool ret = true;
799 task_lock(task);
800 if (set_or_clear) {
801 /* Tasks can have only one CA_CLIENT work interval */
802 if (task->t_flags & TF_CA_CLIENT_WI) {
803 ret = false;
804 } else {
805 task->t_flags |= TF_CA_CLIENT_WI;
806 }
807 } else {
808 task->t_flags &= ~TF_CA_CLIENT_WI;
809 }
810 task_unlock(task);
811 return ret;
812 }
813
814 /*
815 * task_set_dyld_info() is called at most three times.
816 * 1) at task struct creation to set addr/size to zero.
817 * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
818 * 3) is from dyld itself to update location of all_image_info
819 * For security any calls after that are ignored. The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
820 */
821 kern_return_t
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)822 task_set_dyld_info(
823 task_t task,
824 mach_vm_address_t addr,
825 mach_vm_size_t size)
826 {
827 mach_vm_address_t end;
828 if (os_add_overflow(addr, size, &end)) {
829 return KERN_FAILURE;
830 }
831
832 task_lock(task);
833 /* don't accept updates if all_image_info_addr is final */
834 if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
835 bool inputNonZero = ((addr != 0) || (size != 0));
836 bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
837 task->all_image_info_addr = addr;
838 task->all_image_info_size = size;
839 /* can only change from a non-zero value to another non-zero once */
840 if (inputNonZero && currentNonZero) {
841 task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
842 }
843 task_unlock(task);
844 return KERN_SUCCESS;
845 } else {
846 task_unlock(task);
847 return KERN_FAILURE;
848 }
849 }
850
851 bool
task_donates_own_pages(task_t task)852 task_donates_own_pages(
853 task_t task)
854 {
855 return task->donates_own_pages;
856 }
857
858 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)859 task_set_mach_header_address(
860 task_t task,
861 mach_vm_address_t addr)
862 {
863 task_lock(task);
864 task->mach_header_vm_address = addr;
865 task_unlock(task);
866 }
867
868 void
task_bank_reset(__unused task_t task)869 task_bank_reset(__unused task_t task)
870 {
871 if (task->bank_context != NULL) {
872 bank_task_destroy(task);
873 }
874 }
875
876 /*
877 * NOTE: This should only be called when the P_LINTRANSIT
878 * flag is set (the proc_trans lock is held) on the
879 * proc associated with the task.
880 */
881 void
task_bank_init(__unused task_t task)882 task_bank_init(__unused task_t task)
883 {
884 if (task->bank_context != NULL) {
885 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
886 }
887 bank_task_initialize(task);
888 }
889
890 void
task_set_did_exec_flag(task_t task)891 task_set_did_exec_flag(task_t task)
892 {
893 task->t_procflags |= TPF_DID_EXEC;
894 }
895
896 void
task_clear_exec_copy_flag(task_t task)897 task_clear_exec_copy_flag(task_t task)
898 {
899 task->t_procflags &= ~TPF_EXEC_COPY;
900 }
901
902 event_t
task_get_return_wait_event(task_t task)903 task_get_return_wait_event(task_t task)
904 {
905 return (event_t)&task->returnwait_inheritor;
906 }
907
908 void
task_clear_return_wait(task_t task,uint32_t flags)909 task_clear_return_wait(task_t task, uint32_t flags)
910 {
911 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
912 thread_wakeup(task_get_return_wait_event(task));
913 }
914
915 if (flags & TCRW_CLEAR_FINAL_WAIT) {
916 is_write_lock(task->itk_space);
917
918 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
919 task->returnwait_inheritor = NULL;
920
921 if (flags & TCRW_CLEAR_EXEC_COMPLETE) {
922 task->t_returnwaitflags &= ~TRW_LEXEC_COMPLETE;
923 }
924
925 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
926 struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
927 TURNSTILE_ULOCK);
928
929 waitq_wakeup64_all(&turnstile->ts_waitq,
930 CAST_EVENT64_T(task_get_return_wait_event(task)),
931 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
932
933 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
934
935 turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
936 turnstile_cleanup();
937 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
938 }
939 is_write_unlock(task->itk_space);
940 }
941 }
942
943 void __attribute__((noreturn))
task_wait_to_return(void)944 task_wait_to_return(void)
945 {
946 task_t task = current_task();
947 uint8_t returnwaitflags;
948
949 is_write_lock(task->itk_space);
950
951 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
952 struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
953 TURNSTILE_ULOCK);
954
955 do {
956 task->t_returnwaitflags |= TRW_LRETURNWAITER;
957 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
958 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
959
960 waitq_assert_wait64(&turnstile->ts_waitq,
961 CAST_EVENT64_T(task_get_return_wait_event(task)),
962 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
963
964 is_write_unlock(task->itk_space);
965
966 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
967
968 thread_block(THREAD_CONTINUE_NULL);
969
970 is_write_lock(task->itk_space);
971 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
972
973 turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
974 }
975
976 returnwaitflags = task->t_returnwaitflags;
977 is_write_unlock(task->itk_space);
978 turnstile_cleanup();
979
980
981 #if CONFIG_MACF
982 /*
983 * Before jumping to userspace and allowing this process
984 * to execute any code, make sure its credentials are cached,
985 * and notify any interested parties.
986 */
987 extern void current_cached_proc_cred_update(void);
988
989 current_cached_proc_cred_update();
990 if (returnwaitflags & TRW_LEXEC_COMPLETE) {
991 mac_proc_notify_exec_complete(current_proc());
992 }
993 #endif
994
995 thread_bootstrap_return();
996 }
997
998 boolean_t
task_is_exec_copy(task_t task)999 task_is_exec_copy(task_t task)
1000 {
1001 return task_is_exec_copy_internal(task);
1002 }
1003
1004 boolean_t
task_did_exec(task_t task)1005 task_did_exec(task_t task)
1006 {
1007 return task_did_exec_internal(task);
1008 }
1009
1010 boolean_t
task_is_active(task_t task)1011 task_is_active(task_t task)
1012 {
1013 return task->active;
1014 }
1015
1016 boolean_t
task_is_halting(task_t task)1017 task_is_halting(task_t task)
1018 {
1019 return task->halting;
1020 }
1021
1022 void
task_init(void)1023 task_init(void)
1024 {
1025 if (max_task_footprint_mb != 0) {
1026 #if CONFIG_MEMORYSTATUS
1027 if (max_task_footprint_mb < 50) {
1028 printf("Warning: max_task_pmem %d below minimum.\n",
1029 max_task_footprint_mb);
1030 max_task_footprint_mb = 50;
1031 }
1032 printf("Limiting task physical memory footprint to %d MB\n",
1033 max_task_footprint_mb);
1034
1035 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
1036
1037 /*
1038 * Configure the per-task memory limit warning level.
1039 * This is computed as a percentage.
1040 */
1041 max_task_footprint_warning_level = 0;
1042
1043 if (max_mem < 0x40000000) {
1044 /*
1045 * On devices with < 1GB of memory:
1046 * -- set warnings to 50MB below the per-task limit.
1047 */
1048 if (max_task_footprint_mb > 50) {
1049 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
1050 }
1051 } else {
1052 /*
1053 * On devices with >= 1GB of memory:
1054 * -- set warnings to 100MB below the per-task limit.
1055 */
1056 if (max_task_footprint_mb > 100) {
1057 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
1058 }
1059 }
1060
1061 /*
1062 * Never allow warning level to land below the default.
1063 */
1064 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
1065 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
1066 }
1067
1068 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
1069
1070 #else
1071 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
1072 #endif /* CONFIG_MEMORYSTATUS */
1073 }
1074
1075 #if DEVELOPMENT || DEBUG
1076 PE_parse_boot_argn("task_exc_guard_default",
1077 &task_exc_guard_default,
1078 sizeof(task_exc_guard_default));
1079 #endif /* DEVELOPMENT || DEBUG */
1080
1081 #if CONFIG_COREDUMP
1082 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
1083 sizeof(hwm_user_cores))) {
1084 hwm_user_cores = 0;
1085 }
1086 #endif
1087
1088 proc_init_cpumon_params();
1089
1090 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
1091 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
1092 }
1093
1094 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
1095 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
1096 }
1097
1098 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
1099 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
1100 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
1101 }
1102
1103 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
1104 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
1105 }
1106
1107 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
1108 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
1109 }
1110
1111 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
1112 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
1113 }
1114
1115 /*
1116 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
1117 * sets up the ledgers for the default coalition. If we don't have coalitions,
1118 * then we have to call it now.
1119 */
1120 #if CONFIG_COALITIONS
1121 assert(task_ledger_template);
1122 #else /* CONFIG_COALITIONS */
1123 init_task_ledgers();
1124 #endif /* CONFIG_COALITIONS */
1125
1126 task_ref_init();
1127 task_zone_init();
1128
1129 #ifdef __LP64__
1130 boolean_t is_64bit = TRUE;
1131 #else
1132 boolean_t is_64bit = FALSE;
1133 #endif
1134
1135 kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
1136 kernel_task = proc_get_task_raw(kernproc);
1137
1138 /*
1139 * Create the kernel task as the first task.
1140 */
1141 if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1142 is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, kernel_task) != KERN_SUCCESS) {
1143 panic("task_init");
1144 }
1145
1146 ipc_task_enable(kernel_task);
1147
1148 #if defined(HAS_APPLE_PAC)
1149 kernel_task->rop_pid = ml_default_rop_pid();
1150 kernel_task->jop_pid = ml_default_jop_pid();
1151 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1152 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1153 ml_task_set_disable_user_jop(kernel_task, FALSE);
1154 #endif
1155
1156 vm_map_deallocate(kernel_task->map);
1157 kernel_task->map = kernel_map;
1158 }
1159
1160 static inline void
task_zone_init(void)1161 task_zone_init(void)
1162 {
1163 proc_struct_size = roundup(proc_struct_size, task_alignment);
1164 task_struct_size = roundup(sizeof(struct task), proc_alignment);
1165 proc_and_task_size = proc_struct_size + task_struct_size;
1166
1167 proc_task_zone = zone_create_ext("proc_task", proc_and_task_size,
1168 ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1169 }
1170
1171 /*
1172 * Task ledgers
1173 * ------------
1174 *
1175 * phys_footprint
1176 * Physical footprint: This is the sum of:
1177 * + (internal - alternate_accounting)
1178 * + (internal_compressed - alternate_accounting_compressed)
1179 * + iokit_mapped
1180 * + purgeable_nonvolatile
1181 * + purgeable_nonvolatile_compressed
1182 * + page_table
1183 *
1184 * internal
1185 * The task's anonymous memory, which on iOS is always resident.
1186 *
1187 * internal_compressed
1188 * Amount of this task's internal memory which is held by the compressor.
1189 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1190 * and could be either decompressed back into memory, or paged out to storage, depending
1191 * on our implementation.
1192 *
1193 * iokit_mapped
1194 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1195 * clean/dirty or internal/external state].
1196 *
1197 * alternate_accounting
1198 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1199 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1200 * double counting.
1201 *
1202 * pages_grabbed
1203 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1204 * which track UPL, IOPL and Kernel page grabs.
1205 */
1206 void
init_task_ledgers(void)1207 init_task_ledgers(void)
1208 {
1209 ledger_template_t t;
1210
1211 assert(task_ledger_template == NULL);
1212 assert(kernel_task == TASK_NULL);
1213
1214 #if MACH_ASSERT
1215 PE_parse_boot_argn("pmap_ledgers_panic",
1216 &pmap_ledgers_panic,
1217 sizeof(pmap_ledgers_panic));
1218 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1219 &pmap_ledgers_panic_leeway,
1220 sizeof(pmap_ledgers_panic_leeway));
1221 #endif /* MACH_ASSERT */
1222
1223 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1224 panic("couldn't create task ledger template");
1225 }
1226
1227 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1228 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1229 "physmem", "bytes");
1230 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1231 "bytes");
1232 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1233 "bytes");
1234 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1235 "bytes");
1236 task_ledgers.conclave_mem = ledger_entry_add_with_flags(t, "conclave_mem", "physmem", "count",
1237 LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE | LEDGER_ENTRY_ALLOW_DEBIT);
1238 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1239 "bytes");
1240 task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1241 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1242 task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1243 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1244 task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1245 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1246 task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1247 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1248 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1249 "bytes");
1250 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1251 "bytes");
1252 task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1253 task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1254 task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1255 task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1256 task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1257 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1258 #if DEBUG || DEVELOPMENT
1259 task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1260 task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1261 task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1262 task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1263 #endif
1264 task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1265 task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1266 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1267 task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1268 task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1269 task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1270 task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1271 task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1272 task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1273 task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1274 task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1275 task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1276 task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1277 task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1278 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1279 task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1280 task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1281 task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1282 task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1283 task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1284
1285 #if CONFIG_FREEZE
1286 task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1287 #endif /* CONFIG_FREEZE */
1288
1289 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1290 "count");
1291 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1292 "count");
1293
1294 #if CONFIG_SCHED_SFI
1295 sfi_class_id_t class_id, ledger_alias;
1296 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1297 task_ledgers.sfi_wait_times[class_id] = -1;
1298 }
1299
1300 /* don't account for UNSPECIFIED */
1301 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1302 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1303 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1304 /* Check to see if alias has been registered yet */
1305 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1306 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1307 } else {
1308 /* Otherwise, initialize it first */
1309 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1310 }
1311 } else {
1312 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1313 }
1314
1315 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1316 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1317 }
1318 }
1319
1320 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1321 #endif /* CONFIG_SCHED_SFI */
1322
1323 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1324 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1325 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1326 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1327 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1328 #if CONFIG_PHYS_WRITE_ACCT
1329 task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1330 #endif /* CONFIG_PHYS_WRITE_ACCT */
1331 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1332 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1333
1334 #if CONFIG_MEMORYSTATUS
1335 task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1336 #endif /* CONFIG_MEMORYSTATUS */
1337
1338 task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1339 LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1340
1341 if ((task_ledgers.cpu_time < 0) ||
1342 (task_ledgers.tkm_private < 0) ||
1343 (task_ledgers.tkm_shared < 0) ||
1344 (task_ledgers.phys_mem < 0) ||
1345 (task_ledgers.wired_mem < 0) ||
1346 (task_ledgers.conclave_mem < 0) ||
1347 (task_ledgers.internal < 0) ||
1348 (task_ledgers.external < 0) ||
1349 (task_ledgers.reusable < 0) ||
1350 (task_ledgers.iokit_mapped < 0) ||
1351 (task_ledgers.alternate_accounting < 0) ||
1352 (task_ledgers.alternate_accounting_compressed < 0) ||
1353 (task_ledgers.page_table < 0) ||
1354 (task_ledgers.phys_footprint < 0) ||
1355 (task_ledgers.internal_compressed < 0) ||
1356 (task_ledgers.purgeable_volatile < 0) ||
1357 (task_ledgers.purgeable_nonvolatile < 0) ||
1358 (task_ledgers.purgeable_volatile_compressed < 0) ||
1359 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1360 (task_ledgers.tagged_nofootprint < 0) ||
1361 (task_ledgers.tagged_footprint < 0) ||
1362 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1363 (task_ledgers.tagged_footprint_compressed < 0) ||
1364 #if CONFIG_FREEZE
1365 (task_ledgers.frozen_to_swap < 0) ||
1366 #endif /* CONFIG_FREEZE */
1367 (task_ledgers.network_volatile < 0) ||
1368 (task_ledgers.network_nonvolatile < 0) ||
1369 (task_ledgers.network_volatile_compressed < 0) ||
1370 (task_ledgers.network_nonvolatile_compressed < 0) ||
1371 (task_ledgers.media_nofootprint < 0) ||
1372 (task_ledgers.media_footprint < 0) ||
1373 (task_ledgers.media_nofootprint_compressed < 0) ||
1374 (task_ledgers.media_footprint_compressed < 0) ||
1375 (task_ledgers.graphics_nofootprint < 0) ||
1376 (task_ledgers.graphics_footprint < 0) ||
1377 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1378 (task_ledgers.graphics_footprint_compressed < 0) ||
1379 (task_ledgers.neural_nofootprint < 0) ||
1380 (task_ledgers.neural_footprint < 0) ||
1381 (task_ledgers.neural_nofootprint_compressed < 0) ||
1382 (task_ledgers.neural_footprint_compressed < 0) ||
1383 (task_ledgers.platform_idle_wakeups < 0) ||
1384 (task_ledgers.interrupt_wakeups < 0) ||
1385 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1386 (task_ledgers.physical_writes < 0) ||
1387 (task_ledgers.logical_writes < 0) ||
1388 (task_ledgers.logical_writes_to_external < 0) ||
1389 #if CONFIG_PHYS_WRITE_ACCT
1390 (task_ledgers.fs_metadata_writes < 0) ||
1391 #endif /* CONFIG_PHYS_WRITE_ACCT */
1392 #if CONFIG_MEMORYSTATUS
1393 (task_ledgers.memorystatus_dirty_time < 0) ||
1394 #endif /* CONFIG_MEMORYSTATUS */
1395 (task_ledgers.energy_billed_to_me < 0) ||
1396 (task_ledgers.energy_billed_to_others < 0) ||
1397 (task_ledgers.swapins < 0)
1398 ) {
1399 panic("couldn't create entries for task ledger template");
1400 }
1401
1402 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1403 ledger_track_credit_only(t, task_ledgers.internal);
1404 ledger_track_credit_only(t, task_ledgers.external);
1405 ledger_track_credit_only(t, task_ledgers.reusable);
1406
1407 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1408 ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1409 ledger_track_maximum(t, task_ledgers.internal, 60);
1410 ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1411 ledger_track_maximum(t, task_ledgers.reusable, 60);
1412 ledger_track_maximum(t, task_ledgers.external, 60);
1413 #if MACH_ASSERT
1414 if (pmap_ledgers_panic) {
1415 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1416 ledger_panic_on_negative(t, task_ledgers.conclave_mem);
1417 ledger_panic_on_negative(t, task_ledgers.page_table);
1418 ledger_panic_on_negative(t, task_ledgers.internal);
1419 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1420 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1421 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1422 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1423 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1424 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1425 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1426 #if CONFIG_PHYS_WRITE_ACCT
1427 ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1428 #endif /* CONFIG_PHYS_WRITE_ACCT */
1429
1430 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1431 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1432 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1433 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1434 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1435 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1436 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1437 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1438 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1439 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1440 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1441 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1442 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1443 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1444 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1445 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1446 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1447 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1448 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1449 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1450 }
1451 #endif /* MACH_ASSERT */
1452
1453 #if CONFIG_MEMORYSTATUS
1454 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1455 #endif /* CONFIG_MEMORYSTATUS */
1456
1457 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1458 task_wakeups_rate_exceeded, NULL, NULL);
1459 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1460
1461 #if CONFIG_SPTM || !XNU_MONITOR
1462 ledger_template_complete(t);
1463 #else /* CONFIG_SPTM || !XNU_MONITOR */
1464 ledger_template_complete_secure_alloc(t);
1465 #endif /* XNU_MONITOR */
1466 task_ledger_template = t;
1467 }
1468
1469 /* Create a task, but leave the task ports disabled */
1470 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_flags_ro,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t child_task)1471 task_create_internal(
1472 task_t parent_task, /* Null-able */
1473 proc_ro_t proc_ro,
1474 coalition_t *parent_coalitions __unused,
1475 boolean_t inherit_memory,
1476 boolean_t is_64bit,
1477 boolean_t is_64bit_data,
1478 uint32_t t_flags,
1479 uint32_t t_flags_ro,
1480 uint32_t t_procflags,
1481 uint8_t t_returnwaitflags,
1482 task_t child_task)
1483 {
1484 task_t new_task;
1485 vm_shared_region_t shared_region;
1486 ledger_t ledger = NULL;
1487 struct task_ro_data task_ro_data = {};
1488 uint32_t parent_t_flags_ro = 0;
1489
1490 new_task = child_task;
1491
1492 if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1493 return KERN_RESOURCE_SHORTAGE;
1494 }
1495
1496 /* allocate with active entries */
1497 assert(task_ledger_template != NULL);
1498 ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1499 if (ledger == NULL) {
1500 task_ref_count_fini(new_task);
1501 return KERN_RESOURCE_SHORTAGE;
1502 }
1503
1504 counter_alloc(&(new_task->faults));
1505
1506 #if defined(HAS_APPLE_PAC)
1507 const uint8_t disable_user_jop = inherit_memory ? parent_task->disable_user_jop : FALSE;
1508 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1509 ml_task_set_jop_pid(new_task, parent_task, inherit_memory, disable_user_jop);
1510 ml_task_set_disable_user_jop(new_task, disable_user_jop);
1511 #endif
1512
1513
1514 new_task->ledger = ledger;
1515
1516 /* if inherit_memory is true, parent_task MUST not be NULL */
1517 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1518 #if CONFIG_DEFERRED_RECLAIM
1519 if (parent_task->deferred_reclamation_metadata) {
1520 /*
1521 * Prevent concurrent reclaims while we're forking the parent_task's map,
1522 * so that the child's map is in sync with the forked reclamation
1523 * metadata.
1524 */
1525 vm_deferred_reclamation_buffer_lock(
1526 parent_task->deferred_reclamation_metadata);
1527 }
1528 #endif /* CONFIG_DEFERRED_RECLAIM */
1529 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1530 #if CONFIG_DEFERRED_RECLAIM
1531 if (new_task->map != NULL &&
1532 parent_task->deferred_reclamation_metadata) {
1533 new_task->deferred_reclamation_metadata =
1534 vm_deferred_reclamation_buffer_fork(new_task,
1535 parent_task->deferred_reclamation_metadata);
1536 }
1537 #endif /* CONFIG_DEFERRED_RECLAIM */
1538 } else {
1539 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1540 pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1541 vm_map_t new_map;
1542
1543 if (pmap == NULL) {
1544 counter_free(&new_task->faults);
1545 ledger_dereference(ledger);
1546 task_ref_count_fini(new_task);
1547 return KERN_RESOURCE_SHORTAGE;
1548 }
1549 new_map = vm_map_create_options(pmap,
1550 (vm_map_offset_t)(VM_MIN_ADDRESS),
1551 (vm_map_offset_t)(VM_MAX_ADDRESS),
1552 VM_MAP_CREATE_PAGEABLE);
1553 if (parent_task) {
1554 vm_map_inherit_limits(new_map, parent_task->map);
1555 }
1556 new_task->map = new_map;
1557 }
1558
1559 if (new_task->map == NULL) {
1560 counter_free(&new_task->faults);
1561 ledger_dereference(ledger);
1562 task_ref_count_fini(new_task);
1563 return KERN_RESOURCE_SHORTAGE;
1564 }
1565
1566 #if defined(CONFIG_SCHED_MULTIQ)
1567 new_task->sched_group = sched_group_create();
1568 #endif
1569
1570 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1571 queue_init(&new_task->threads);
1572 new_task->suspend_count = 0;
1573 new_task->thread_count = 0;
1574 new_task->active_thread_count = 0;
1575 new_task->user_stop_count = 0;
1576 new_task->legacy_stop_count = 0;
1577 new_task->active = TRUE;
1578 new_task->halting = FALSE;
1579 new_task->priv_flags = 0;
1580 new_task->t_flags = t_flags;
1581 task_ro_data.t_flags_ro = t_flags_ro;
1582 new_task->t_procflags = t_procflags;
1583 new_task->t_returnwaitflags = t_returnwaitflags;
1584 new_task->returnwait_inheritor = current_thread();
1585 new_task->importance = 0;
1586 new_task->crashed_thread_id = 0;
1587 new_task->watchports = NULL;
1588 new_task->t_rr_ranges = NULL;
1589
1590 new_task->bank_context = NULL;
1591
1592 if (parent_task) {
1593 parent_t_flags_ro = task_ro_flags_get(parent_task);
1594 }
1595
1596 if (parent_task && inherit_memory) {
1597 #if __has_feature(ptrauth_calls)
1598 /* Inherit the pac exception flags from parent if in fork */
1599 task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1600 TFRO_PAC_EXC_FATAL));
1601 #endif /* __has_feature(ptrauth_calls) */
1602 /* Inherit the hardened binary flags from parent if in fork */
1603 task_ro_data.t_flags_ro |= parent_t_flags_ro & (TFRO_HARDENED | TFRO_PLATFORM | TFRO_JIT_EXC_FATAL);
1604 #if XNU_TARGET_OS_OSX
1605 task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_MACH_HARDENING_OPT_OUT;
1606 #endif /* XNU_TARGET_OS_OSX */
1607 }
1608
1609 #ifdef MACH_BSD
1610 new_task->corpse_info = NULL;
1611 #endif /* MACH_BSD */
1612
1613 /* kern_task not created by this function has unique id 0, start with 1 here. */
1614 task_set_uniqueid(new_task);
1615
1616 #if CONFIG_MACF
1617 set_task_crash_label(new_task, NULL);
1618
1619 task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1620 task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1621 #endif
1622
1623 #if CONFIG_MEMORYSTATUS
1624 if (max_task_footprint != 0) {
1625 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1626 }
1627 #endif /* CONFIG_MEMORYSTATUS */
1628
1629 if (task_wakeups_monitor_rate != 0) {
1630 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1631 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1632 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1633 }
1634
1635 #if CONFIG_IO_ACCOUNTING
1636 uint32_t flags = IOMON_ENABLE;
1637 task_io_monitor_ctl(new_task, &flags);
1638 #endif /* CONFIG_IO_ACCOUNTING */
1639
1640 machine_task_init(new_task, parent_task, inherit_memory);
1641
1642 new_task->task_debug = NULL;
1643
1644 #if DEVELOPMENT || DEBUG
1645 new_task->task_unnested = FALSE;
1646 new_task->task_disconnected_count = 0;
1647 #endif
1648 queue_init(&new_task->semaphore_list);
1649 new_task->semaphores_owned = 0;
1650
1651 new_task->vtimers = 0;
1652
1653 new_task->shared_region = NULL;
1654
1655 new_task->affinity_space = NULL;
1656
1657 #if CONFIG_CPU_COUNTERS
1658 new_task->t_kpc = 0;
1659 #endif /* CONFIG_CPU_COUNTERS */
1660
1661 new_task->pidsuspended = FALSE;
1662 new_task->frozen = FALSE;
1663 new_task->changing_freeze_state = FALSE;
1664 new_task->rusage_cpu_flags = 0;
1665 new_task->rusage_cpu_percentage = 0;
1666 new_task->rusage_cpu_interval = 0;
1667 new_task->rusage_cpu_deadline = 0;
1668 new_task->rusage_cpu_callt = NULL;
1669 #if MACH_ASSERT
1670 new_task->suspends_outstanding = 0;
1671 #endif
1672 recount_task_init(&new_task->tk_recount);
1673
1674 #if HYPERVISOR
1675 new_task->hv_task_target = NULL;
1676 #endif /* HYPERVISOR */
1677
1678 #if CONFIG_TASKWATCH
1679 queue_init(&new_task->task_watchers);
1680 new_task->num_taskwatchers = 0;
1681 new_task->watchapplying = 0;
1682 #endif /* CONFIG_TASKWATCH */
1683
1684 new_task->mem_notify_reserved = 0;
1685 new_task->memlimit_attrs_reserved = 0;
1686
1687 new_task->requested_policy = default_task_requested_policy;
1688 new_task->effective_policy = default_task_effective_policy;
1689
1690 new_task->task_shared_region_slide = -1;
1691
1692 if (parent_task != NULL) {
1693 task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1694 task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1695
1696 /* only inherit the option bits, no effect until task_set_immovable_pinned() */
1697 task_ro_data.task_control_port_options = task_get_control_port_options(parent_task);
1698
1699 task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1700 #if CONFIG_MACF
1701 if (!(t_flags & TF_CORPSE_FORK)) {
1702 task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(parent_task);
1703 task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(parent_task);
1704 }
1705 #endif
1706 } else {
1707 task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1708 task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1709
1710 task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1711 }
1712
1713 /* must set before task_importance_init_from_parent: */
1714 if (proc_ro != NULL) {
1715 new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1716 } else {
1717 new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1718 }
1719
1720 ipc_task_init(new_task, parent_task);
1721
1722 task_importance_init_from_parent(new_task, parent_task);
1723
1724 new_task->corpse_vmobject_list = NULL;
1725
1726 if (parent_task != TASK_NULL) {
1727 /* inherit the parent's shared region */
1728 shared_region = vm_shared_region_get(parent_task);
1729 if (shared_region != NULL) {
1730 vm_shared_region_set(new_task, shared_region);
1731 }
1732
1733 #if __has_feature(ptrauth_calls)
1734 /* use parent's shared_region_id */
1735 char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1736 if (shared_region_id != NULL) {
1737 shared_region_key_alloc(shared_region_id, FALSE, 0); /* get a reference */
1738 }
1739 task_set_shared_region_id(new_task, shared_region_id);
1740 #endif /* __has_feature(ptrauth_calls) */
1741
1742 if (task_has_64Bit_addr(parent_task)) {
1743 task_set_64Bit_addr(new_task);
1744 }
1745
1746 if (task_has_64Bit_data(parent_task)) {
1747 task_set_64Bit_data(new_task);
1748 }
1749
1750 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1751 new_task->all_image_info_size = parent_task->all_image_info_size;
1752 new_task->mach_header_vm_address = 0;
1753
1754 if (inherit_memory && parent_task->affinity_space) {
1755 task_affinity_create(parent_task, new_task);
1756 }
1757
1758 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1759
1760 new_task->task_exc_guard = parent_task->task_exc_guard;
1761 if (parent_task->t_flags & TF_NO_SMT) {
1762 new_task->t_flags |= TF_NO_SMT;
1763 }
1764
1765 if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1766 new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1767 }
1768
1769 if (parent_task->t_flags & TF_TECS) {
1770 new_task->t_flags |= TF_TECS;
1771 }
1772
1773 #if defined(__x86_64__)
1774 if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1775 new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1776 }
1777 #endif
1778
1779 new_task->priority = BASEPRI_DEFAULT;
1780 new_task->max_priority = MAXPRI_USER;
1781
1782 task_policy_create(new_task, parent_task);
1783 } else {
1784 #ifdef __LP64__
1785 if (is_64bit) {
1786 task_set_64Bit_addr(new_task);
1787 }
1788 #endif
1789
1790 if (is_64bit_data) {
1791 task_set_64Bit_data(new_task);
1792 }
1793
1794 new_task->all_image_info_addr = (mach_vm_address_t)0;
1795 new_task->all_image_info_size = (mach_vm_size_t)0;
1796
1797 new_task->pset_hint = PROCESSOR_SET_NULL;
1798
1799 new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1800
1801 if (new_task == kernel_task) {
1802 new_task->priority = BASEPRI_KERNEL;
1803 new_task->max_priority = MAXPRI_KERNEL;
1804 } else {
1805 new_task->priority = BASEPRI_DEFAULT;
1806 new_task->max_priority = MAXPRI_USER;
1807 }
1808 }
1809
1810 bzero(new_task->coalition, sizeof(new_task->coalition));
1811 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1812 queue_chain_init(new_task->task_coalition[i]);
1813 }
1814
1815 /* Allocate I/O Statistics */
1816 new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1817 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1818
1819 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1820 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1821
1822 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1823
1824 counter_alloc(&(new_task->pageins));
1825 counter_alloc(&(new_task->cow_faults));
1826 counter_alloc(&(new_task->messages_sent));
1827 counter_alloc(&(new_task->messages_received));
1828
1829 /* Copy resource acc. info from Parent for Corpe Forked task. */
1830 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1831 task_rollup_accounting_info(new_task, parent_task);
1832 task_store_owned_vmobject_info(new_task, parent_task);
1833 } else {
1834 /* Initialize to zero for standard fork/spawn case */
1835 new_task->total_runnable_time = 0;
1836 new_task->syscalls_mach = 0;
1837 new_task->syscalls_unix = 0;
1838 new_task->c_switch = 0;
1839 new_task->p_switch = 0;
1840 new_task->ps_switch = 0;
1841 new_task->decompressions = 0;
1842 new_task->low_mem_notified_warn = 0;
1843 new_task->low_mem_notified_critical = 0;
1844 new_task->purged_memory_warn = 0;
1845 new_task->purged_memory_critical = 0;
1846 new_task->low_mem_privileged_listener = 0;
1847 new_task->memlimit_is_active = 0;
1848 new_task->memlimit_is_fatal = 0;
1849 new_task->memlimit_active_exc_resource = 0;
1850 new_task->memlimit_inactive_exc_resource = 0;
1851 new_task->task_timer_wakeups_bin_1 = 0;
1852 new_task->task_timer_wakeups_bin_2 = 0;
1853 new_task->task_gpu_ns = 0;
1854 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1855 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1856 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1857 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1858 new_task->task_writes_counters_external.task_immediate_writes = 0;
1859 new_task->task_writes_counters_external.task_deferred_writes = 0;
1860 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1861 new_task->task_writes_counters_external.task_metadata_writes = 0;
1862 #if CONFIG_PHYS_WRITE_ACCT
1863 new_task->task_fs_metadata_writes = 0;
1864 #endif /* CONFIG_PHYS_WRITE_ACCT */
1865 }
1866
1867
1868 new_task->donates_own_pages = FALSE;
1869 #if CONFIG_COALITIONS
1870 if (!(t_flags & TF_CORPSE_FORK)) {
1871 /* TODO: there is no graceful failure path here... */
1872 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1873 coalitions_adopt_task(parent_coalitions, new_task);
1874 if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1875 new_task->donates_own_pages = coalition_is_swappable(parent_coalitions[COALITION_TYPE_JETSAM]);
1876 }
1877 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1878 /*
1879 * all tasks at least have a resource coalition, so
1880 * if the parent has one then inherit all coalitions
1881 * the parent is a part of
1882 */
1883 coalitions_adopt_task(parent_task->coalition, new_task);
1884 if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1885 new_task->donates_own_pages = coalition_is_swappable(parent_task->coalition[COALITION_TYPE_JETSAM]);
1886 }
1887 } else {
1888 /* TODO: assert that new_task will be PID 1 (launchd) */
1889 coalitions_adopt_init_task(new_task);
1890 }
1891 /*
1892 * on exec, we need to transfer the coalition roles from the
1893 * parent task to the exec copy task.
1894 */
1895 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1896 int coal_roles[COALITION_NUM_TYPES];
1897 task_coalition_roles(parent_task, coal_roles);
1898 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1899 }
1900 } else {
1901 coalitions_adopt_corpse_task(new_task);
1902 }
1903
1904 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1905 panic("created task is not a member of a resource coalition");
1906 }
1907 task_set_coalition_member(new_task);
1908 #endif /* CONFIG_COALITIONS */
1909
1910 new_task->dispatchqueue_offset = 0;
1911 if (parent_task != NULL) {
1912 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1913 }
1914
1915 new_task->task_can_transfer_memory_ownership = FALSE;
1916 new_task->task_volatile_objects = 0;
1917 new_task->task_nonvolatile_objects = 0;
1918 new_task->task_objects_disowning = FALSE;
1919 new_task->task_objects_disowned = FALSE;
1920 new_task->task_owned_objects = 0;
1921 queue_init(&new_task->task_objq);
1922
1923 #if CONFIG_FREEZE
1924 queue_init(&new_task->task_frozen_cseg_q);
1925 #endif /* CONFIG_FREEZE */
1926
1927 task_objq_lock_init(new_task);
1928
1929 #if __arm64__
1930 new_task->task_legacy_footprint = FALSE;
1931 new_task->task_extra_footprint_limit = FALSE;
1932 new_task->task_ios13extended_footprint_limit = FALSE;
1933 #endif /* __arm64__ */
1934 new_task->task_region_footprint = FALSE;
1935 new_task->task_has_crossed_thread_limit = FALSE;
1936 new_task->task_thread_limit = 0;
1937 #if CONFIG_SECLUDED_MEMORY
1938 new_task->task_can_use_secluded_mem = FALSE;
1939 new_task->task_could_use_secluded_mem = FALSE;
1940 new_task->task_could_also_use_secluded_mem = FALSE;
1941 new_task->task_suppressed_secluded = FALSE;
1942 #endif /* CONFIG_SECLUDED_MEMORY */
1943
1944 /*
1945 * t_flags is set up above. But since we don't
1946 * support darkwake mode being set that way
1947 * currently, we clear it out here explicitly.
1948 */
1949 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1950
1951 queue_init(&new_task->io_user_clients);
1952 new_task->loadTag = 0;
1953
1954 lck_mtx_lock(&tasks_threads_lock);
1955 queue_enter(&tasks, new_task, task_t, tasks);
1956 tasks_count++;
1957 if (tasks_suspend_state) {
1958 task_suspend_internal(new_task);
1959 }
1960 lck_mtx_unlock(&tasks_threads_lock);
1961 task_ref_hold_proc_task_struct(new_task);
1962
1963 return KERN_SUCCESS;
1964 }
1965
1966 /*
1967 * task_rollup_accounting_info
1968 *
1969 * Roll up accounting stats. Used to rollup stats
1970 * for exec copy task and corpse fork.
1971 */
1972 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1973 task_rollup_accounting_info(task_t to_task, task_t from_task)
1974 {
1975 assert(from_task != to_task);
1976
1977 recount_task_copy(&to_task->tk_recount, &from_task->tk_recount);
1978 to_task->total_runnable_time = from_task->total_runnable_time;
1979 counter_add(&to_task->faults, counter_load(&from_task->faults));
1980 counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1981 counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1982 counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1983 counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1984 to_task->decompressions = from_task->decompressions;
1985 to_task->syscalls_mach = from_task->syscalls_mach;
1986 to_task->syscalls_unix = from_task->syscalls_unix;
1987 to_task->c_switch = from_task->c_switch;
1988 to_task->p_switch = from_task->p_switch;
1989 to_task->ps_switch = from_task->ps_switch;
1990 to_task->extmod_statistics = from_task->extmod_statistics;
1991 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1992 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1993 to_task->purged_memory_warn = from_task->purged_memory_warn;
1994 to_task->purged_memory_critical = from_task->purged_memory_critical;
1995 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1996 *to_task->task_io_stats = *from_task->task_io_stats;
1997 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1998 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1999 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
2000 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
2001 to_task->task_gpu_ns = from_task->task_gpu_ns;
2002 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
2003 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
2004 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
2005 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
2006 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
2007 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
2008 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
2009 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
2010 #if CONFIG_PHYS_WRITE_ACCT
2011 to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
2012 #endif /* CONFIG_PHYS_WRITE_ACCT */
2013
2014 #if CONFIG_MEMORYSTATUS
2015 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
2016 #endif /* CONFIG_MEMORYSTATUS */
2017
2018 /* Skip ledger roll up for memory accounting entries */
2019 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
2020 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
2021 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
2022 #if CONFIG_SCHED_SFI
2023 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
2024 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
2025 }
2026 #endif
2027 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
2028 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
2029 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
2030 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
2031 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
2032 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
2033 }
2034
2035 /*
2036 * task_deallocate_internal:
2037 *
2038 * Drop a reference on a task.
2039 * Don't call this directly.
2040 */
2041 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
2042 void
task_deallocate_internal(task_t task,os_ref_count_t refs)2043 task_deallocate_internal(
2044 task_t task,
2045 os_ref_count_t refs)
2046 {
2047 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
2048
2049 if (task == TASK_NULL) {
2050 return;
2051 }
2052
2053 #if IMPORTANCE_INHERITANCE
2054 if (refs == 1) {
2055 /*
2056 * If last ref potentially comes from the task's importance,
2057 * disconnect it. But more task refs may be added before
2058 * that completes, so wait for the reference to go to zero
2059 * naturally (it may happen on a recursive task_deallocate()
2060 * from the ipc_importance_disconnect_task() call).
2061 */
2062 if (IIT_NULL != task->task_imp_base) {
2063 ipc_importance_disconnect_task(task);
2064 }
2065 return;
2066 }
2067 #endif /* IMPORTANCE_INHERITANCE */
2068
2069 if (refs > 0) {
2070 return;
2071 }
2072
2073 /*
2074 * The task should be dead at this point. Ensure other resources
2075 * like threads, are gone before we trash the world.
2076 */
2077 assert(queue_empty(&task->threads));
2078 assert(get_bsdtask_info(task) == NULL);
2079 assert(!is_active(task->itk_space));
2080 assert(!task->active);
2081 assert(task->active_thread_count == 0);
2082 assert(!task_get_game_mode(task));
2083
2084 lck_mtx_lock(&tasks_threads_lock);
2085 assert(terminated_tasks_count > 0);
2086 queue_remove(&terminated_tasks, task, task_t, tasks);
2087 terminated_tasks_count--;
2088 lck_mtx_unlock(&tasks_threads_lock);
2089
2090 /*
2091 * remove the reference on bank context
2092 */
2093 task_bank_reset(task);
2094
2095 kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
2096
2097 /*
2098 * Give the machine dependent code a chance
2099 * to perform cleanup before ripping apart
2100 * the task.
2101 */
2102 machine_task_terminate(task);
2103
2104 ipc_task_terminate(task);
2105
2106 /* let iokit know 2 */
2107 iokit_task_terminate(task, 2);
2108
2109 /* Unregister task from userspace coredumps on panic */
2110 kern_unregister_userspace_coredump(task);
2111
2112 if (task->affinity_space) {
2113 task_affinity_deallocate(task);
2114 }
2115
2116 #if MACH_ASSERT
2117 if (task->ledger != NULL &&
2118 task->map != NULL &&
2119 task->map->pmap != NULL &&
2120 task->map->pmap->ledger != NULL) {
2121 assert(task->ledger == task->map->pmap->ledger);
2122 }
2123 #endif /* MACH_ASSERT */
2124
2125 vm_owned_objects_disown(task);
2126 assert(task->task_objects_disowned);
2127 if (task->task_owned_objects != 0) {
2128 panic("task_deallocate(%p): "
2129 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
2130 task,
2131 task->task_volatile_objects,
2132 task->task_nonvolatile_objects,
2133 task->task_owned_objects);
2134 }
2135
2136 #if CONFIG_DEFERRED_RECLAIM
2137 if (task->deferred_reclamation_metadata != NULL) {
2138 vm_deferred_reclamation_buffer_deallocate(task->deferred_reclamation_metadata);
2139 task->deferred_reclamation_metadata = NULL;
2140 }
2141 #endif /* CONFIG_DEFERRED_RECLAIM */
2142
2143 vm_map_deallocate(task->map);
2144 if (task->is_large_corpse) {
2145 assert(large_corpse_count > 0);
2146 OSDecrementAtomic(&large_corpse_count);
2147 task->is_large_corpse = false;
2148 }
2149 is_release(task->itk_space);
2150
2151 if (task->t_rr_ranges) {
2152 restartable_ranges_release(task->t_rr_ranges);
2153 }
2154
2155 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2156 &interrupt_wakeups, &debit);
2157 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2158 &platform_idle_wakeups, &debit);
2159
2160 #if defined(CONFIG_SCHED_MULTIQ)
2161 sched_group_destroy(task->sched_group);
2162 #endif
2163
2164 struct recount_times_mach sum = { 0 };
2165 struct recount_times_mach p_only = { 0 };
2166 recount_task_times_perf_only(task, &sum, &p_only);
2167 #if CONFIG_PERVASIVE_ENERGY
2168 uint64_t energy = recount_task_energy_nj(task);
2169 #endif /* CONFIG_PERVASIVE_ENERGY */
2170 recount_task_deinit(&task->tk_recount);
2171
2172 /* Accumulate statistics for dead tasks */
2173 lck_spin_lock(&dead_task_statistics_lock);
2174 dead_task_statistics.total_user_time += sum.rtm_user;
2175 dead_task_statistics.total_system_time += sum.rtm_system;
2176
2177 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2178 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2179
2180 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2181 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2182 dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2183 dead_task_statistics.total_pset_switches += task->ps_switch;
2184 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2185 #if CONFIG_PERVASIVE_ENERGY
2186 dead_task_statistics.task_energy += energy;
2187 #endif /* CONFIG_PERVASIVE_ENERGY */
2188
2189 lck_spin_unlock(&dead_task_statistics_lock);
2190 lck_mtx_destroy(&task->lock, &task_lck_grp);
2191
2192 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
2193 &debit)) {
2194 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2195 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2196 }
2197 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
2198 &debit)) {
2199 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2200 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2201 }
2202 ledger_dereference(task->ledger);
2203
2204 counter_free(&task->faults);
2205 counter_free(&task->pageins);
2206 counter_free(&task->cow_faults);
2207 counter_free(&task->messages_sent);
2208 counter_free(&task->messages_received);
2209
2210 #if CONFIG_COALITIONS
2211 task_release_coalitions(task);
2212 #endif /* CONFIG_COALITIONS */
2213
2214 bzero(task->coalition, sizeof(task->coalition));
2215
2216 #if MACH_BSD
2217 /* clean up collected information since last reference to task is gone */
2218 if (task->corpse_info) {
2219 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
2220 task_crashinfo_destroy(task->corpse_info);
2221 task->corpse_info = NULL;
2222 kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2223 }
2224 #endif
2225
2226 #if CONFIG_MACF
2227 if (get_task_crash_label(task)) {
2228 mac_exc_free_label(get_task_crash_label(task));
2229 set_task_crash_label(task, NULL);
2230 }
2231 #endif
2232
2233 assert(queue_empty(&task->task_objq));
2234 task_objq_lock_destroy(task);
2235
2236 if (task->corpse_vmobject_list) {
2237 kfree_data(task->corpse_vmobject_list,
2238 (vm_size_t)task->corpse_vmobject_list_size);
2239 }
2240
2241 task_ref_count_fini(task);
2242 proc_ro_erase_task(task->bsd_info_ro);
2243 task_release_proc_task_struct(task, task->bsd_info_ro);
2244 }
2245
2246 /*
2247 * task_name_deallocate_mig:
2248 *
2249 * Drop a reference on a task name.
2250 */
2251 void
task_name_deallocate_mig(task_name_t task_name)2252 task_name_deallocate_mig(
2253 task_name_t task_name)
2254 {
2255 return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2256 }
2257
2258 /*
2259 * task_policy_set_deallocate_mig:
2260 *
2261 * Drop a reference on a task type.
2262 */
2263 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2264 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2265 {
2266 return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2267 }
2268
2269 /*
2270 * task_policy_get_deallocate_mig:
2271 *
2272 * Drop a reference on a task type.
2273 */
2274 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2275 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2276 {
2277 return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2278 }
2279
2280 /*
2281 * task_inspect_deallocate_mig:
2282 *
2283 * Drop a task inspection reference.
2284 */
2285 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2286 task_inspect_deallocate_mig(
2287 task_inspect_t task_inspect)
2288 {
2289 return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2290 }
2291
2292 /*
2293 * task_read_deallocate_mig:
2294 *
2295 * Drop a reference on task read port.
2296 */
2297 void
task_read_deallocate_mig(task_read_t task_read)2298 task_read_deallocate_mig(
2299 task_read_t task_read)
2300 {
2301 return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2302 }
2303
2304 /*
2305 * task_suspension_token_deallocate:
2306 *
2307 * Drop a reference on a task suspension token.
2308 */
2309 void
task_suspension_token_deallocate(task_suspension_token_t token)2310 task_suspension_token_deallocate(
2311 task_suspension_token_t token)
2312 {
2313 return task_deallocate((task_t)token);
2314 }
2315
2316 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2317 task_suspension_token_deallocate_grp(
2318 task_suspension_token_t token,
2319 task_grp_t grp)
2320 {
2321 return task_deallocate_grp((task_t)token, grp);
2322 }
2323
2324 /*
2325 * task_collect_crash_info:
2326 *
2327 * collect crash info from bsd and mach based data
2328 */
2329 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2330 task_collect_crash_info(
2331 task_t task,
2332 #ifdef CONFIG_MACF
2333 struct label *crash_label,
2334 #endif
2335 int is_corpse_fork)
2336 {
2337 kern_return_t kr = KERN_SUCCESS;
2338
2339 kcdata_descriptor_t crash_data = NULL;
2340 kcdata_descriptor_t crash_data_release = NULL;
2341 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2342 mach_vm_offset_t crash_data_ptr = 0;
2343 void *crash_data_kernel = NULL;
2344 void *crash_data_kernel_release = NULL;
2345 #if CONFIG_MACF
2346 struct label *label, *free_label;
2347 #endif
2348
2349 if (!corpses_enabled()) {
2350 return KERN_NOT_SUPPORTED;
2351 }
2352
2353 #if CONFIG_MACF
2354 free_label = label = mac_exc_create_label(NULL);
2355 #endif
2356
2357 task_lock(task);
2358
2359 assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2360 if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2361 #if CONFIG_MACF
2362 /* Set the crash label, used by the exception delivery mac hook */
2363 free_label = get_task_crash_label(task); // Most likely NULL.
2364 set_task_crash_label(task, label);
2365 mac_exc_update_task_crash_label(task, crash_label);
2366 #endif
2367 task_unlock(task);
2368
2369 crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2370 Z_WAITOK | Z_ZERO);
2371 if (crash_data_kernel == NULL) {
2372 kr = KERN_RESOURCE_SHORTAGE;
2373 goto out_no_lock;
2374 }
2375 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2376
2377 /* Do not get a corpse ref for corpse fork */
2378 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2379 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2380 KCFLAG_USE_MEMCOPY);
2381 if (crash_data) {
2382 task_lock(task);
2383 crash_data_release = task->corpse_info;
2384 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2385 task->corpse_info = crash_data;
2386
2387 task_unlock(task);
2388 kr = KERN_SUCCESS;
2389 } else {
2390 kfree_data(crash_data_kernel,
2391 CORPSEINFO_ALLOCATION_SIZE);
2392 kr = KERN_FAILURE;
2393 }
2394
2395 if (crash_data_release != NULL) {
2396 task_crashinfo_destroy(crash_data_release);
2397 }
2398 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2399 } else {
2400 task_unlock(task);
2401 }
2402
2403 out_no_lock:
2404 #if CONFIG_MACF
2405 if (free_label != NULL) {
2406 mac_exc_free_label(free_label);
2407 }
2408 #endif
2409 return kr;
2410 }
2411
2412 /*
2413 * task_deliver_crash_notification:
2414 *
2415 * Makes outcall to registered host port for a corpse.
2416 */
2417 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2418 task_deliver_crash_notification(
2419 task_t corpse, /* corpse or corpse fork */
2420 thread_t thread,
2421 exception_type_t etype,
2422 mach_exception_subcode_t subcode)
2423 {
2424 kcdata_descriptor_t crash_info = corpse->corpse_info;
2425 thread_t th_iter = NULL;
2426 kern_return_t kr = KERN_SUCCESS;
2427 wait_interrupt_t wsave;
2428 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2429 ipc_port_t corpse_port;
2430
2431 if (crash_info == NULL) {
2432 return KERN_FAILURE;
2433 }
2434
2435 assert(task_is_a_corpse(corpse));
2436
2437 task_lock(corpse);
2438
2439 /*
2440 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2441 * Crash reporters should derive whether it's fatal from corpse blob.
2442 */
2443 code[0] = etype;
2444 code[1] = subcode;
2445
2446 queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2447 {
2448 if (th_iter->corpse_dup == FALSE) {
2449 ipc_thread_reset(th_iter);
2450 }
2451 }
2452 task_unlock(corpse);
2453
2454 /* Arm the no-sender notification for taskport */
2455 task_reference(corpse);
2456 corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2457
2458 wsave = thread_interrupt_level(THREAD_UNINT);
2459 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2460 if (kr != KERN_SUCCESS) {
2461 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2462 }
2463
2464 (void)thread_interrupt_level(wsave);
2465
2466 /*
2467 * Drop the send right on corpse port, will fire the
2468 * no-sender notification if exception deliver failed.
2469 */
2470 ipc_port_release_send(corpse_port);
2471 return kr;
2472 }
2473
2474 /*
2475 * task_terminate:
2476 *
2477 * Terminate the specified task. See comments on thread_terminate
2478 * (kern/thread.c) about problems with terminating the "current task."
2479 */
2480
2481 kern_return_t
task_terminate(task_t task)2482 task_terminate(
2483 task_t task)
2484 {
2485 if (task == TASK_NULL) {
2486 return KERN_INVALID_ARGUMENT;
2487 }
2488
2489 if (get_bsdtask_info(task)) {
2490 return KERN_FAILURE;
2491 }
2492
2493 return task_terminate_internal(task);
2494 }
2495
2496 #if MACH_ASSERT
2497 extern int proc_pid(struct proc *);
2498 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2499 #endif /* MACH_ASSERT */
2500
2501 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2502 static void
task_partial_reap(task_t task,__unused int pid)2503 __unused task_partial_reap(task_t task, __unused int pid)
2504 {
2505 unsigned int reclaimed_resident = 0;
2506 unsigned int reclaimed_compressed = 0;
2507 uint64_t task_page_count;
2508
2509 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2510
2511 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2512 pid, task_page_count, 0, 0, 0);
2513
2514 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2515
2516 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2517 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2518 }
2519
2520 /*
2521 * task_mark_corpse:
2522 *
2523 * Mark the task as a corpse. Called by crashing thread.
2524 */
2525 kern_return_t
task_mark_corpse(task_t task)2526 task_mark_corpse(task_t task)
2527 {
2528 kern_return_t kr = KERN_SUCCESS;
2529 thread_t self_thread;
2530 (void) self_thread;
2531 wait_interrupt_t wsave;
2532 #if CONFIG_MACF
2533 struct label *crash_label = NULL;
2534 #endif
2535
2536 assert(task != kernel_task);
2537 assert(task == current_task());
2538 assert(!task_is_a_corpse(task));
2539
2540 #if CONFIG_MACF
2541 crash_label = mac_exc_create_label_for_proc((struct proc*)get_bsdtask_info(task));
2542 #endif
2543
2544 kr = task_collect_crash_info(task,
2545 #if CONFIG_MACF
2546 crash_label,
2547 #endif
2548 FALSE);
2549 if (kr != KERN_SUCCESS) {
2550 goto out;
2551 }
2552
2553 self_thread = current_thread();
2554
2555 wsave = thread_interrupt_level(THREAD_UNINT);
2556 task_lock(task);
2557
2558 /*
2559 * Check if any other thread called task_terminate_internal
2560 * and made the task inactive before we could mark it for
2561 * corpse pending report. Bail out if the task is inactive.
2562 */
2563 if (!task->active) {
2564 kcdata_descriptor_t crash_data_release = task->corpse_info;;
2565 void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2566
2567 task->corpse_info = NULL;
2568 task_unlock(task);
2569
2570 if (crash_data_release != NULL) {
2571 task_crashinfo_destroy(crash_data_release);
2572 }
2573 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2574 return KERN_TERMINATED;
2575 }
2576
2577 task_set_corpse_pending_report(task);
2578 task_set_corpse(task);
2579 task->crashed_thread_id = thread_tid(self_thread);
2580
2581 kr = task_start_halt_locked(task, TRUE);
2582 assert(kr == KERN_SUCCESS);
2583
2584 task_set_uniqueid(task);
2585
2586 task_unlock(task);
2587
2588 /*
2589 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2590 * disable old ports here instead.
2591 *
2592 * The vm_map and ipc_space must exist until this function returns,
2593 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2594 */
2595 ipc_task_disable(task);
2596
2597 /* let iokit know 1 */
2598 iokit_task_terminate(task, 1);
2599
2600 /* terminate the ipc space */
2601 ipc_space_terminate(task->itk_space);
2602
2603 /* Add it to global corpse task list */
2604 task_add_to_corpse_task_list(task);
2605
2606 thread_terminate_internal(self_thread);
2607
2608 (void) thread_interrupt_level(wsave);
2609 assert(task->halting == TRUE);
2610
2611 out:
2612 #if CONFIG_MACF
2613 mac_exc_free_label(crash_label);
2614 #endif
2615 return kr;
2616 }
2617
2618 /*
2619 * task_set_uniqueid
2620 *
2621 * Set task uniqueid to systemwide unique 64 bit value
2622 */
2623 void
task_set_uniqueid(task_t task)2624 task_set_uniqueid(task_t task)
2625 {
2626 task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2627 }
2628
2629 /*
2630 * task_clear_corpse
2631 *
2632 * Clears the corpse pending bit on task.
2633 * Removes inspection bit on the threads.
2634 */
2635 void
task_clear_corpse(task_t task)2636 task_clear_corpse(task_t task)
2637 {
2638 thread_t th_iter = NULL;
2639
2640 task_lock(task);
2641 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2642 {
2643 thread_mtx_lock(th_iter);
2644 th_iter->inspection = FALSE;
2645 ipc_thread_disable(th_iter);
2646 thread_mtx_unlock(th_iter);
2647 }
2648
2649 thread_terminate_crashed_threads();
2650 /* remove the pending corpse report flag */
2651 task_clear_corpse_pending_report(task);
2652
2653 task_unlock(task);
2654 }
2655
2656 /*
2657 * task_port_no_senders
2658 *
2659 * Called whenever the Mach port system detects no-senders on
2660 * the task port of a corpse.
2661 * Each notification that comes in should terminate the task (corpse).
2662 */
2663 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2664 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2665 {
2666 task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2667
2668 assert(task != TASK_NULL);
2669 assert(task_is_a_corpse(task));
2670
2671 /* Remove the task from global corpse task list */
2672 task_remove_from_corpse_task_list(task);
2673
2674 task_clear_corpse(task);
2675 vm_map_unset_corpse_source(task->map);
2676 task_terminate_internal(task);
2677 }
2678
2679 /*
2680 * task_port_with_flavor_no_senders
2681 *
2682 * Called whenever the Mach port system detects no-senders on
2683 * the task inspect or read port. These ports are allocated lazily and
2684 * should be deallocated here when there are no senders remaining.
2685 */
2686 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2687 task_port_with_flavor_no_senders(
2688 ipc_port_t port,
2689 mach_port_mscount_t mscount __unused)
2690 {
2691 task_t task;
2692 mach_task_flavor_t flavor;
2693 ipc_kobject_type_t kotype;
2694
2695 ip_mq_lock(port);
2696 if (port->ip_srights > 0) {
2697 ip_mq_unlock(port);
2698 return;
2699 }
2700 kotype = ip_kotype(port);
2701 assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2702 task = ipc_kobject_get_locked(port, kotype);
2703 if (task != TASK_NULL) {
2704 task_reference(task);
2705 }
2706 ip_mq_unlock(port);
2707
2708 if (task == TASK_NULL) {
2709 /* The task is exiting or disabled; it will eventually deallocate the port */
2710 return;
2711 }
2712
2713 if (kotype == IKOT_TASK_READ) {
2714 flavor = TASK_FLAVOR_READ;
2715 } else {
2716 flavor = TASK_FLAVOR_INSPECT;
2717 }
2718
2719 itk_lock(task);
2720 ip_mq_lock(port);
2721
2722 /*
2723 * If the port is no longer active, then ipc_task_terminate() ran
2724 * and destroyed the kobject already. Just deallocate the task
2725 * ref we took and go away.
2726 *
2727 * It is also possible that several nsrequests are in flight,
2728 * only one shall NULL-out the port entry, and this is the one
2729 * that gets to dealloc the port.
2730 *
2731 * Check for a stale no-senders notification. A call to any function
2732 * that vends out send rights to this port could resurrect it between
2733 * this notification being generated and actually being handled here.
2734 */
2735 if (!ip_active(port) ||
2736 task->itk_task_ports[flavor] != port ||
2737 port->ip_srights > 0) {
2738 ip_mq_unlock(port);
2739 itk_unlock(task);
2740 task_deallocate(task);
2741 return;
2742 }
2743
2744 assert(task->itk_task_ports[flavor] == port);
2745 task->itk_task_ports[flavor] = IP_NULL;
2746 itk_unlock(task);
2747
2748 ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2749
2750 task_deallocate(task);
2751 }
2752
2753 /*
2754 * task_wait_till_threads_terminate_locked
2755 *
2756 * Wait till all the threads in the task are terminated.
2757 * Might release the task lock and re-acquire it.
2758 */
2759 void
task_wait_till_threads_terminate_locked(task_t task)2760 task_wait_till_threads_terminate_locked(task_t task)
2761 {
2762 /* wait for all the threads in the task to terminate */
2763 while (task->active_thread_count != 0) {
2764 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2765 task_unlock(task);
2766 thread_block(THREAD_CONTINUE_NULL);
2767
2768 task_lock(task);
2769 }
2770 }
2771
2772 /*
2773 * task_duplicate_map_and_threads
2774 *
2775 * Copy vmmap of source task.
2776 * Copy active threads from source task to destination task.
2777 * Source task would be suspended during the copy.
2778 */
2779 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2780 task_duplicate_map_and_threads(
2781 task_t task,
2782 void *p,
2783 task_t new_task,
2784 thread_t *thread_ret,
2785 uint64_t **udata_buffer,
2786 int *size,
2787 int *num_udata,
2788 bool for_exception)
2789 {
2790 kern_return_t kr = KERN_SUCCESS;
2791 int active;
2792 thread_t thread, self, thread_return = THREAD_NULL;
2793 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2794 thread_t *thread_array;
2795 uint32_t active_thread_count = 0, array_count = 0, i;
2796 vm_map_t oldmap;
2797 uint64_t *buffer = NULL;
2798 int buf_size = 0;
2799 int est_knotes = 0, num_knotes = 0;
2800
2801 self = current_thread();
2802
2803 /*
2804 * Suspend the task to copy thread state, use the internal
2805 * variant so that no user-space process can resume
2806 * the task from under us
2807 */
2808 kr = task_suspend_internal(task);
2809 if (kr != KERN_SUCCESS) {
2810 return kr;
2811 }
2812
2813 if (task->map->disable_vmentry_reuse == TRUE) {
2814 /*
2815 * Quite likely GuardMalloc (or some debugging tool)
2816 * is being used on this task. And it has gone through
2817 * its limit. Making a corpse will likely encounter
2818 * a lot of VM entries that will need COW.
2819 *
2820 * Skip it.
2821 */
2822 #if DEVELOPMENT || DEBUG
2823 memorystatus_abort_vm_map_fork(task);
2824 #endif
2825 ktriage_record(thread_tid(self), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CORPSE, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CORPSE_FAIL_LIBGMALLOC), 0 /* arg */);
2826 task_resume_internal(task);
2827 return KERN_FAILURE;
2828 }
2829
2830 /* Check with VM if vm_map_fork is allowed for this task */
2831 bool is_large = false;
2832 if (memorystatus_allowed_vm_map_fork(task, &is_large)) {
2833 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2834 oldmap = new_task->map;
2835 new_task->map = vm_map_fork(new_task->ledger,
2836 task->map,
2837 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2838 VM_MAP_FORK_PRESERVE_PURGEABLE |
2839 VM_MAP_FORK_CORPSE_FOOTPRINT));
2840 if (new_task->map) {
2841 new_task->is_large_corpse = is_large;
2842 vm_map_deallocate(oldmap);
2843
2844 /* copy ledgers that impact the memory footprint */
2845 vm_map_copy_footprint_ledgers(task, new_task);
2846
2847 /* Get all the udata pointers from kqueue */
2848 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2849 if (est_knotes > 0) {
2850 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2851 buffer = kalloc_data(buf_size, Z_WAITOK);
2852 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2853 if (num_knotes > est_knotes + 32) {
2854 num_knotes = est_knotes + 32;
2855 }
2856 }
2857 } else {
2858 if (is_large) {
2859 assert(large_corpse_count > 0);
2860 OSDecrementAtomic(&large_corpse_count);
2861 }
2862 new_task->map = oldmap;
2863 #if DEVELOPMENT || DEBUG
2864 memorystatus_abort_vm_map_fork(task);
2865 #endif
2866 task_resume_internal(task);
2867 return KERN_NO_SPACE;
2868 }
2869 } else if (!for_exception) {
2870 #if DEVELOPMENT || DEBUG
2871 memorystatus_abort_vm_map_fork(task);
2872 #endif
2873 task_resume_internal(task);
2874 return KERN_NO_SPACE;
2875 }
2876
2877 active_thread_count = task->active_thread_count;
2878 if (active_thread_count == 0) {
2879 kfree_data(buffer, buf_size);
2880 task_resume_internal(task);
2881 return KERN_FAILURE;
2882 }
2883
2884 thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2885
2886 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2887 task_lock(task);
2888 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2889 /* Skip inactive threads */
2890 active = thread->active;
2891 if (!active) {
2892 continue;
2893 }
2894
2895 if (array_count >= active_thread_count) {
2896 break;
2897 }
2898
2899 thread_array[array_count++] = thread;
2900 thread_reference(thread);
2901 }
2902 task_unlock(task);
2903
2904 for (i = 0; i < array_count; i++) {
2905 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2906 if (kr != KERN_SUCCESS) {
2907 break;
2908 }
2909
2910 /* Equivalent of current thread in corpse */
2911 if (thread_array[i] == self) {
2912 thread_return = new_thread;
2913 new_task->crashed_thread_id = thread_tid(new_thread);
2914 } else if (first_thread == NULL) {
2915 first_thread = new_thread;
2916 } else {
2917 /* drop the extra ref returned by thread_create_with_continuation */
2918 thread_deallocate(new_thread);
2919 }
2920
2921 kr = thread_dup2(thread_array[i], new_thread);
2922 if (kr != KERN_SUCCESS) {
2923 thread_mtx_lock(new_thread);
2924 new_thread->corpse_dup = TRUE;
2925 thread_mtx_unlock(new_thread);
2926 continue;
2927 }
2928
2929 /* Copy thread name */
2930 bsd_copythreadname(get_bsdthread_info(new_thread),
2931 get_bsdthread_info(thread_array[i]));
2932 new_thread->thread_tag = thread_array[i]->thread_tag &
2933 ~THREAD_TAG_USER_JOIN;
2934 thread_copy_resource_info(new_thread, thread_array[i]);
2935 }
2936
2937 /* return the first thread if we couldn't find the equivalent of current */
2938 if (thread_return == THREAD_NULL) {
2939 thread_return = first_thread;
2940 } else if (first_thread != THREAD_NULL) {
2941 /* drop the extra ref returned by thread_create_with_continuation */
2942 thread_deallocate(first_thread);
2943 }
2944
2945 task_resume_internal(task);
2946
2947 for (i = 0; i < array_count; i++) {
2948 thread_deallocate(thread_array[i]);
2949 }
2950 kfree_type(thread_t, active_thread_count, thread_array);
2951
2952 if (kr == KERN_SUCCESS) {
2953 *thread_ret = thread_return;
2954 *udata_buffer = buffer;
2955 *size = buf_size;
2956 *num_udata = num_knotes;
2957 } else {
2958 if (thread_return != THREAD_NULL) {
2959 thread_deallocate(thread_return);
2960 }
2961 kfree_data(buffer, buf_size);
2962 }
2963
2964 return kr;
2965 }
2966
2967 #if CONFIG_SECLUDED_MEMORY
2968 extern void task_set_can_use_secluded_mem_locked(
2969 task_t task,
2970 boolean_t can_use_secluded_mem);
2971 #endif /* CONFIG_SECLUDED_MEMORY */
2972
2973 #if MACH_ASSERT
2974 int debug4k_panic_on_terminate = 0;
2975 #endif /* MACH_ASSERT */
2976 kern_return_t
task_terminate_internal(task_t task)2977 task_terminate_internal(
2978 task_t task)
2979 {
2980 thread_t thread, self;
2981 task_t self_task;
2982 boolean_t interrupt_save;
2983 int pid = 0;
2984
2985 assert(task != kernel_task);
2986
2987 self = current_thread();
2988 self_task = current_task();
2989
2990 /*
2991 * Get the task locked and make sure that we are not racing
2992 * with someone else trying to terminate us.
2993 */
2994 if (task == self_task) {
2995 task_lock(task);
2996 } else if (task < self_task) {
2997 task_lock(task);
2998 task_lock(self_task);
2999 } else {
3000 task_lock(self_task);
3001 task_lock(task);
3002 }
3003
3004 #if CONFIG_SECLUDED_MEMORY
3005 if (task->task_can_use_secluded_mem) {
3006 task_set_can_use_secluded_mem_locked(task, FALSE);
3007 }
3008 task->task_could_use_secluded_mem = FALSE;
3009 task->task_could_also_use_secluded_mem = FALSE;
3010
3011 if (task->task_suppressed_secluded) {
3012 stop_secluded_suppression(task);
3013 }
3014 #endif /* CONFIG_SECLUDED_MEMORY */
3015
3016 if (!task->active) {
3017 /*
3018 * Task is already being terminated.
3019 * Just return an error. If we are dying, this will
3020 * just get us to our AST special handler and that
3021 * will get us to finalize the termination of ourselves.
3022 */
3023 task_unlock(task);
3024 if (self_task != task) {
3025 task_unlock(self_task);
3026 }
3027
3028 return KERN_FAILURE;
3029 }
3030
3031 if (task_corpse_pending_report(task)) {
3032 /*
3033 * Task is marked for reporting as corpse.
3034 * Just return an error. This will
3035 * just get us to our AST special handler and that
3036 * will get us to finish the path to death
3037 */
3038 task_unlock(task);
3039 if (self_task != task) {
3040 task_unlock(self_task);
3041 }
3042
3043 return KERN_FAILURE;
3044 }
3045
3046 if (self_task != task) {
3047 task_unlock(self_task);
3048 }
3049
3050 /*
3051 * Make sure the current thread does not get aborted out of
3052 * the waits inside these operations.
3053 */
3054 interrupt_save = thread_interrupt_level(THREAD_UNINT);
3055
3056 /*
3057 * Indicate that we want all the threads to stop executing
3058 * at user space by holding the task (we would have held
3059 * each thread independently in thread_terminate_internal -
3060 * but this way we may be more likely to already find it
3061 * held there). Mark the task inactive, and prevent
3062 * further task operations via the task port.
3063 *
3064 * The vm_map and ipc_space must exist until this function returns,
3065 * convert_port_to_{map,space}_with_flavor relies on this behavior.
3066 */
3067 task_hold_locked(task);
3068 task->active = FALSE;
3069 ipc_task_disable(task);
3070
3071 #if CONFIG_EXCLAVES
3072 task_stop_conclave(task, false);
3073 #endif /* CONFIG_EXCLAVES */
3074
3075 #if CONFIG_TELEMETRY
3076 /*
3077 * Notify telemetry that this task is going away.
3078 */
3079 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
3080 #endif
3081
3082 /*
3083 * Terminate each thread in the task.
3084 */
3085 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3086 thread_terminate_internal(thread);
3087 }
3088
3089 #ifdef MACH_BSD
3090 void *bsd_info = get_bsdtask_info(task);
3091 if (bsd_info != NULL) {
3092 pid = proc_pid(bsd_info);
3093 }
3094 #endif /* MACH_BSD */
3095
3096 task_unlock(task);
3097
3098 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3099 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3100
3101 /* Early object reap phase */
3102
3103 // PR-17045188: Revisit implementation
3104 // task_partial_reap(task, pid);
3105
3106 #if CONFIG_TASKWATCH
3107 /*
3108 * remove all task watchers
3109 */
3110 task_removewatchers(task);
3111
3112 #endif /* CONFIG_TASKWATCH */
3113
3114 /*
3115 * Destroy all synchronizers owned by the task.
3116 */
3117 task_synchronizer_destroy_all(task);
3118
3119 /*
3120 * Clear the watchport boost on the task.
3121 */
3122 task_remove_turnstile_watchports(task);
3123
3124 /* let iokit know 1 */
3125 iokit_task_terminate(task, 1);
3126
3127 /*
3128 * Destroy the IPC space, leaving just a reference for it.
3129 */
3130 ipc_space_terminate(task->itk_space);
3131
3132 #if 00
3133 /* if some ledgers go negative on tear-down again... */
3134 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3135 task_ledgers.phys_footprint);
3136 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3137 task_ledgers.internal);
3138 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3139 task_ledgers.iokit_mapped);
3140 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3141 task_ledgers.alternate_accounting);
3142 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3143 task_ledgers.alternate_accounting_compressed);
3144 #endif
3145
3146 #if CONFIG_DEFERRED_RECLAIM
3147 /*
3148 * Remove this tasks reclaim buffer from global queues.
3149 */
3150 if (task->deferred_reclamation_metadata != NULL) {
3151 vm_deferred_reclamation_buffer_uninstall(task->deferred_reclamation_metadata);
3152 }
3153 #endif /* CONFIG_DEFERRED_RECLAIM */
3154
3155 /*
3156 * If the current thread is a member of the task
3157 * being terminated, then the last reference to
3158 * the task will not be dropped until the thread
3159 * is finally reaped. To avoid incurring the
3160 * expense of removing the address space regions
3161 * at reap time, we do it explictly here.
3162 */
3163
3164 #if MACH_ASSERT
3165 /*
3166 * Identify the pmap's process, in case the pmap ledgers drift
3167 * and we have to report it.
3168 */
3169 char procname[17];
3170 void *proc = get_bsdtask_info(task);
3171 if (proc) {
3172 pid = proc_pid(proc);
3173 proc_name_kdp(proc, procname, sizeof(procname));
3174 } else {
3175 pid = 0;
3176 strlcpy(procname, "<unknown>", sizeof(procname));
3177 }
3178 pmap_set_process(task->map->pmap, pid, procname);
3179 if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3180 DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3181 if (debug4k_panic_on_terminate) {
3182 panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3183 }
3184 }
3185 #endif /* MACH_ASSERT */
3186
3187 vm_map_terminate(task->map);
3188
3189 /* release our shared region */
3190 vm_shared_region_set(task, NULL);
3191
3192 #if __has_feature(ptrauth_calls)
3193 task_set_shared_region_id(task, NULL);
3194 #endif /* __has_feature(ptrauth_calls) */
3195
3196 lck_mtx_lock(&tasks_threads_lock);
3197 queue_remove(&tasks, task, task_t, tasks);
3198 queue_enter(&terminated_tasks, task, task_t, tasks);
3199 tasks_count--;
3200 terminated_tasks_count++;
3201 lck_mtx_unlock(&tasks_threads_lock);
3202
3203 /*
3204 * We no longer need to guard against being aborted, so restore
3205 * the previous interruptible state.
3206 */
3207 thread_interrupt_level(interrupt_save);
3208
3209 #if CONFIG_CPU_COUNTERS
3210 /* force the task to release all ctrs */
3211 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3212 kpc_force_all_ctrs(task, 0);
3213 }
3214 #endif /* CONFIG_CPU_COUNTERS */
3215
3216 #if CONFIG_COALITIONS
3217 /*
3218 * Leave the coalition for corpse task or task that
3219 * never had any active threads (e.g. fork, exec failure).
3220 * For task with active threads, the task will be removed
3221 * from coalition by last terminating thread.
3222 */
3223 if (task->active_thread_count == 0) {
3224 coalitions_remove_task(task);
3225 }
3226 #endif
3227
3228 #if CONFIG_FREEZE
3229 extern int vm_compressor_available;
3230 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3231 task_disown_frozen_csegs(task);
3232 assert(queue_empty(&task->task_frozen_cseg_q));
3233 }
3234 #endif /* CONFIG_FREEZE */
3235
3236
3237 /*
3238 * Get rid of the task active reference on itself.
3239 */
3240 task_deallocate_grp(task, TASK_GRP_INTERNAL);
3241
3242 return KERN_SUCCESS;
3243 }
3244
3245 void
tasks_system_suspend(boolean_t suspend)3246 tasks_system_suspend(boolean_t suspend)
3247 {
3248 task_t task;
3249
3250 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3251 (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3252
3253 lck_mtx_lock(&tasks_threads_lock);
3254 assert(tasks_suspend_state != suspend);
3255 tasks_suspend_state = suspend;
3256 queue_iterate(&tasks, task, task_t, tasks) {
3257 if (task == kernel_task) {
3258 continue;
3259 }
3260 suspend ? task_suspend_internal(task) : task_resume_internal(task);
3261 }
3262 lck_mtx_unlock(&tasks_threads_lock);
3263 }
3264
3265 /*
3266 * task_start_halt:
3267 *
3268 * Shut the current task down (except for the current thread) in
3269 * preparation for dramatic changes to the task (probably exec).
3270 * We hold the task and mark all other threads in the task for
3271 * termination.
3272 */
3273 kern_return_t
task_start_halt(task_t task)3274 task_start_halt(task_t task)
3275 {
3276 kern_return_t kr = KERN_SUCCESS;
3277 task_lock(task);
3278 kr = task_start_halt_locked(task, FALSE);
3279 task_unlock(task);
3280 return kr;
3281 }
3282
3283 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3284 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3285 {
3286 thread_t thread, self;
3287 uint64_t dispatchqueue_offset;
3288
3289 assert(task != kernel_task);
3290
3291 self = current_thread();
3292
3293 if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3294 return KERN_INVALID_ARGUMENT;
3295 }
3296
3297 if (!should_mark_corpse &&
3298 (task->halting || !task->active || !self->active)) {
3299 /*
3300 * Task or current thread is already being terminated.
3301 * Hurry up and return out of the current kernel context
3302 * so that we run our AST special handler to terminate
3303 * ourselves. If should_mark_corpse is set, the corpse
3304 * creation might have raced with exec, let the corpse
3305 * creation continue, once the current thread reaches AST
3306 * thread in exec will be woken up from task_complete_halt.
3307 * Exec will fail cause the proc was marked for exit.
3308 * Once the thread in exec reaches AST, it will call proc_exit
3309 * and deliver the EXC_CORPSE_NOTIFY.
3310 */
3311 return KERN_FAILURE;
3312 }
3313
3314 /* Thread creation will fail after this point of no return. */
3315 task->halting = TRUE;
3316
3317 /*
3318 * Mark all the threads to keep them from starting any more
3319 * user-level execution. The thread_terminate_internal code
3320 * would do this on a thread by thread basis anyway, but this
3321 * gives us a better chance of not having to wait there.
3322 */
3323 task_hold_locked(task);
3324
3325 #if CONFIG_EXCLAVES
3326 if (should_mark_corpse) {
3327 void *crash_info_ptr = task_get_corpseinfo(task);
3328 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3329 if (crash_info_ptr != NULL && thread->th_exclaves_ipc_buffer != NULL) {
3330 struct thread_crash_exclaves_info info = { 0 };
3331
3332 info.tcei_flags = kExclaveRPCActive;
3333 info.tcei_scid = thread->th_exclaves_scheduling_context_id;
3334 info.tcei_thread_id = thread->thread_id;
3335
3336 kcdata_push_data(crash_info_ptr,
3337 STACKSHOT_KCTYPE_KERN_EXCLAVES_CRASH_THREADINFO,
3338 sizeof(struct thread_crash_exclaves_info), &info);
3339 }
3340 }
3341
3342 task_unlock(task);
3343 task_stop_conclave(task, true);
3344 task_lock(task);
3345 }
3346 #endif /* CONFIG_EXCLAVES */
3347
3348 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3349 /*
3350 * Terminate all the other threads in the task.
3351 */
3352 queue_iterate(&task->threads, thread, thread_t, task_threads)
3353 {
3354 /*
3355 * Remove priority throttles for threads to terminate timely. This has
3356 * to be done after task_hold_locked() traps all threads to AST, but before
3357 * threads are marked inactive in thread_terminate_internal(). Takes thread
3358 * mutex lock.
3359 *
3360 * We need task_is_a_corpse() check so that we don't accidently update policy
3361 * for tasks that are doing posix_spawn().
3362 *
3363 * See: thread_policy_update_tasklocked().
3364 */
3365 if (task_is_a_corpse(task)) {
3366 proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3367 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3368 }
3369
3370 if (should_mark_corpse) {
3371 thread_mtx_lock(thread);
3372 thread->inspection = TRUE;
3373 thread_mtx_unlock(thread);
3374 }
3375 if (thread != self) {
3376 thread_terminate_internal(thread);
3377 }
3378 }
3379 task->dispatchqueue_offset = dispatchqueue_offset;
3380
3381 task_release_locked(task);
3382
3383 return KERN_SUCCESS;
3384 }
3385
3386
3387 /*
3388 * task_complete_halt:
3389 *
3390 * Complete task halt by waiting for threads to terminate, then clean
3391 * up task resources (VM, port namespace, etc...) and then let the
3392 * current thread go in the (practically empty) task context.
3393 *
3394 * Note: task->halting flag is not cleared in order to avoid creation
3395 * of new thread in old exec'ed task.
3396 */
3397 void
task_complete_halt(task_t task)3398 task_complete_halt(task_t task)
3399 {
3400 task_lock(task);
3401 assert(task->halting);
3402 assert(task == current_task());
3403
3404 /*
3405 * Wait for the other threads to get shut down.
3406 * When the last other thread is reaped, we'll be
3407 * woken up.
3408 */
3409 if (task->thread_count > 1) {
3410 assert_wait((event_t)&task->halting, THREAD_UNINT);
3411 task_unlock(task);
3412 thread_block(THREAD_CONTINUE_NULL);
3413 } else {
3414 task_unlock(task);
3415 }
3416
3417 #if CONFIG_DEFERRED_RECLAIM
3418 if (task->deferred_reclamation_metadata) {
3419 vm_deferred_reclamation_buffer_uninstall(
3420 task->deferred_reclamation_metadata);
3421 vm_deferred_reclamation_buffer_deallocate(
3422 task->deferred_reclamation_metadata);
3423 task->deferred_reclamation_metadata = NULL;
3424 }
3425 #endif /* CONFIG_DEFERRED_RECLAIM */
3426
3427 /*
3428 * Give the machine dependent code a chance
3429 * to perform cleanup of task-level resources
3430 * associated with the current thread before
3431 * ripping apart the task.
3432 */
3433 machine_task_terminate(task);
3434
3435 /*
3436 * Destroy all synchronizers owned by the task.
3437 */
3438 task_synchronizer_destroy_all(task);
3439
3440 /* let iokit know 1 */
3441 iokit_task_terminate(task, 1);
3442
3443 /*
3444 * Terminate the IPC space. A long time ago,
3445 * this used to be ipc_space_clean() which would
3446 * keep the space active but hollow it.
3447 *
3448 * We really do not need this semantics given
3449 * tasks die with exec now.
3450 */
3451 ipc_space_terminate(task->itk_space);
3452
3453 /*
3454 * Clean out the address space, as we are going to be
3455 * getting a new one.
3456 */
3457 vm_map_terminate(task->map);
3458
3459 /*
3460 * Kick out any IOKitUser handles to the task. At best they're stale,
3461 * at worst someone is racing a SUID exec.
3462 */
3463 /* let iokit know 2 */
3464 iokit_task_terminate(task, 2);
3465 }
3466
3467 #ifdef CONFIG_TASK_SUSPEND_STATS
3468
3469 static void
_task_mark_suspend_source(task_t task)3470 _task_mark_suspend_source(task_t task)
3471 {
3472 int idx;
3473 task_suspend_stats_t stats;
3474 task_suspend_source_t source;
3475 task_lock_assert_owned(task);
3476 stats = &task->t_suspend_stats;
3477
3478 idx = stats->tss_count % TASK_SUSPEND_SOURCES_MAX;
3479 source = &task->t_suspend_sources[idx];
3480 bzero(source, sizeof(*source));
3481
3482 source->tss_time = mach_absolute_time();
3483 source->tss_tid = current_thread()->thread_id;
3484 source->tss_pid = task_pid(current_task());
3485 task_best_name(current_task(), source->tss_procname, sizeof(source->tss_procname));
3486
3487 stats->tss_count++;
3488 }
3489
3490 static inline void
_task_mark_suspend_start(task_t task)3491 _task_mark_suspend_start(task_t task)
3492 {
3493 task_lock_assert_owned(task);
3494 task->t_suspend_stats.tss_last_start = mach_absolute_time();
3495 }
3496
3497 static inline void
_task_mark_suspend_end(task_t task)3498 _task_mark_suspend_end(task_t task)
3499 {
3500 task_lock_assert_owned(task);
3501 task->t_suspend_stats.tss_last_end = mach_absolute_time();
3502 task->t_suspend_stats.tss_duration += (task->t_suspend_stats.tss_last_end -
3503 task->t_suspend_stats.tss_last_start);
3504 }
3505
3506 static kern_return_t
_task_get_suspend_stats_locked(task_t task,task_suspend_stats_t stats)3507 _task_get_suspend_stats_locked(task_t task, task_suspend_stats_t stats)
3508 {
3509 if (task == TASK_NULL || stats == NULL) {
3510 return KERN_INVALID_ARGUMENT;
3511 }
3512 task_lock_assert_owned(task);
3513 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3514 return KERN_SUCCESS;
3515 }
3516
3517 static kern_return_t
_task_get_suspend_sources_locked(task_t task,task_suspend_source_t sources)3518 _task_get_suspend_sources_locked(task_t task, task_suspend_source_t sources)
3519 {
3520 if (task == TASK_NULL || sources == NULL) {
3521 return KERN_INVALID_ARGUMENT;
3522 }
3523 task_lock_assert_owned(task);
3524 memcpy(sources, task->t_suspend_sources,
3525 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3526 return KERN_SUCCESS;
3527 }
3528
3529 #endif /* CONFIG_TASK_SUSPEND_STATS */
3530
3531 kern_return_t
task_get_suspend_stats(task_t task,task_suspend_stats_t stats)3532 task_get_suspend_stats(task_t task, task_suspend_stats_t stats)
3533 {
3534 #ifdef CONFIG_TASK_SUSPEND_STATS
3535 kern_return_t kr;
3536 if (task == TASK_NULL || stats == NULL) {
3537 return KERN_INVALID_ARGUMENT;
3538 }
3539 task_lock(task);
3540 kr = _task_get_suspend_stats_locked(task, stats);
3541 task_unlock(task);
3542 return kr;
3543 #else /* CONFIG_TASK_SUSPEND_STATS */
3544 (void)task;
3545 (void)stats;
3546 return KERN_NOT_SUPPORTED;
3547 #endif
3548 }
3549
3550 kern_return_t
task_get_suspend_stats_kdp(task_t task,task_suspend_stats_t stats)3551 task_get_suspend_stats_kdp(task_t task, task_suspend_stats_t stats)
3552 {
3553 #ifdef CONFIG_TASK_SUSPEND_STATS
3554 if (task == TASK_NULL || stats == NULL) {
3555 return KERN_INVALID_ARGUMENT;
3556 }
3557 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3558 return KERN_SUCCESS;
3559 #else /* CONFIG_TASK_SUSPEND_STATS */
3560 #pragma unused(task, stats)
3561 return KERN_NOT_SUPPORTED;
3562 #endif /* CONFIG_TASK_SUSPEND_STATS */
3563 }
3564
3565 kern_return_t
task_get_suspend_sources(task_t task,task_suspend_source_array_t sources)3566 task_get_suspend_sources(task_t task, task_suspend_source_array_t sources)
3567 {
3568 #ifdef CONFIG_TASK_SUSPEND_STATS
3569 kern_return_t kr;
3570 if (task == TASK_NULL || sources == NULL) {
3571 return KERN_INVALID_ARGUMENT;
3572 }
3573 task_lock(task);
3574 kr = _task_get_suspend_sources_locked(task, sources);
3575 task_unlock(task);
3576 return kr;
3577 #else /* CONFIG_TASK_SUSPEND_STATS */
3578 (void)task;
3579 (void)sources;
3580 return KERN_NOT_SUPPORTED;
3581 #endif
3582 }
3583
3584 kern_return_t
task_get_suspend_sources_kdp(task_t task,task_suspend_source_array_t sources)3585 task_get_suspend_sources_kdp(task_t task, task_suspend_source_array_t sources)
3586 {
3587 #ifdef CONFIG_TASK_SUSPEND_STATS
3588 if (task == TASK_NULL || sources == NULL) {
3589 return KERN_INVALID_ARGUMENT;
3590 }
3591 memcpy(sources, task->t_suspend_sources,
3592 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3593 return KERN_SUCCESS;
3594 #else /* CONFIG_TASK_SUSPEND_STATS */
3595 #pragma unused(task, sources)
3596 return KERN_NOT_SUPPORTED;
3597 #endif
3598 }
3599
3600 /*
3601 * task_hold_locked:
3602 *
3603 * Suspend execution of the specified task.
3604 * This is a recursive-style suspension of the task, a count of
3605 * suspends is maintained.
3606 *
3607 * CONDITIONS: the task is locked and active.
3608 */
3609 void
task_hold_locked(task_t task)3610 task_hold_locked(
3611 task_t task)
3612 {
3613 thread_t thread;
3614 void *bsd_info = get_bsdtask_info(task);
3615
3616 assert(task->active);
3617
3618 if (task->suspend_count++ > 0) {
3619 return;
3620 }
3621
3622 if (bsd_info) {
3623 workq_proc_suspended(bsd_info);
3624 }
3625
3626 /*
3627 * Iterate through all the threads and hold them.
3628 */
3629 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3630 thread_mtx_lock(thread);
3631 thread_hold(thread);
3632 thread_mtx_unlock(thread);
3633 }
3634
3635 #ifdef CONFIG_TASK_SUSPEND_STATS
3636 _task_mark_suspend_start(task);
3637 #endif
3638 }
3639
3640 /*
3641 * task_hold_and_wait
3642 *
3643 * Same as the internal routine above, except that is must lock
3644 * and verify that the task is active. This differs from task_suspend
3645 * in that it places a kernel hold on the task rather than just a
3646 * user-level hold. This keeps users from over resuming and setting
3647 * it running out from under the kernel.
3648 *
3649 * CONDITIONS: the caller holds a reference on the task
3650 */
3651 kern_return_t
task_hold_and_wait(task_t task)3652 task_hold_and_wait(
3653 task_t task)
3654 {
3655 if (task == TASK_NULL) {
3656 return KERN_INVALID_ARGUMENT;
3657 }
3658
3659 task_lock(task);
3660 if (!task->active) {
3661 task_unlock(task);
3662 return KERN_FAILURE;
3663 }
3664
3665 #ifdef CONFIG_TASK_SUSPEND_STATS
3666 _task_mark_suspend_source(task);
3667 #endif /* CONFIG_TASK_SUSPEND_STATS */
3668
3669 task_hold_locked(task);
3670 task_wait_locked(task, FALSE);
3671 task_unlock(task);
3672
3673 return KERN_SUCCESS;
3674 }
3675
3676 /*
3677 * task_wait_locked:
3678 *
3679 * Wait for all threads in task to stop.
3680 *
3681 * Conditions:
3682 * Called with task locked, active, and held.
3683 */
3684 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3685 task_wait_locked(
3686 task_t task,
3687 boolean_t until_not_runnable)
3688 {
3689 thread_t thread, self;
3690
3691 assert(task->active);
3692 assert(task->suspend_count > 0);
3693
3694 self = current_thread();
3695
3696 /*
3697 * Iterate through all the threads and wait for them to
3698 * stop. Do not wait for the current thread if it is within
3699 * the task.
3700 */
3701 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3702 if (thread != self) {
3703 thread_wait(thread, until_not_runnable);
3704 }
3705 }
3706 }
3707
3708 boolean_t
task_is_app_suspended(task_t task)3709 task_is_app_suspended(task_t task)
3710 {
3711 return task->pidsuspended;
3712 }
3713
3714 /*
3715 * task_release_locked:
3716 *
3717 * Release a kernel hold on a task.
3718 *
3719 * CONDITIONS: the task is locked and active
3720 */
3721 void
task_release_locked(task_t task)3722 task_release_locked(
3723 task_t task)
3724 {
3725 thread_t thread;
3726 void *bsd_info = get_bsdtask_info(task);
3727
3728 assert(task->active);
3729 assert(task->suspend_count > 0);
3730
3731 if (--task->suspend_count > 0) {
3732 return;
3733 }
3734
3735 if (bsd_info) {
3736 workq_proc_resumed(bsd_info);
3737 }
3738
3739 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3740 thread_mtx_lock(thread);
3741 thread_release(thread);
3742 thread_mtx_unlock(thread);
3743 }
3744
3745 #if CONFIG_TASK_SUSPEND_STATS
3746 _task_mark_suspend_end(task);
3747 #endif
3748 }
3749
3750 /*
3751 * task_release:
3752 *
3753 * Same as the internal routine above, except that it must lock
3754 * and verify that the task is active.
3755 *
3756 * CONDITIONS: The caller holds a reference to the task
3757 */
3758 kern_return_t
task_release(task_t task)3759 task_release(
3760 task_t task)
3761 {
3762 if (task == TASK_NULL) {
3763 return KERN_INVALID_ARGUMENT;
3764 }
3765
3766 task_lock(task);
3767
3768 if (!task->active) {
3769 task_unlock(task);
3770
3771 return KERN_FAILURE;
3772 }
3773
3774 task_release_locked(task);
3775 task_unlock(task);
3776
3777 return KERN_SUCCESS;
3778 }
3779
3780 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3781 task_threads_internal(
3782 task_t task,
3783 thread_act_array_t *threads_out,
3784 mach_msg_type_number_t *countp,
3785 mach_thread_flavor_t flavor)
3786 {
3787 mach_msg_type_number_t actual, count, count_needed;
3788 thread_act_array_t thread_list;
3789 thread_t thread;
3790 unsigned int i;
3791
3792 count = 0;
3793 thread_list = NULL;
3794
3795 if (task == TASK_NULL) {
3796 return KERN_INVALID_ARGUMENT;
3797 }
3798
3799 assert(flavor <= THREAD_FLAVOR_INSPECT);
3800
3801 for (;;) {
3802 task_lock(task);
3803 if (!task->active) {
3804 task_unlock(task);
3805
3806 mach_port_array_free(thread_list, count);
3807 return KERN_FAILURE;
3808 }
3809
3810 count_needed = actual = task->thread_count;
3811 if (count_needed <= count) {
3812 break;
3813 }
3814
3815 /* unlock the task and allocate more memory */
3816 task_unlock(task);
3817
3818 mach_port_array_free(thread_list, count);
3819 count = count_needed;
3820 thread_list = mach_port_array_alloc(count, Z_WAITOK);
3821
3822 if (thread_list == NULL) {
3823 return KERN_RESOURCE_SHORTAGE;
3824 }
3825 }
3826
3827 i = 0;
3828 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3829 assert(i < actual);
3830 thread_reference(thread);
3831 ((thread_t *)thread_list)[i++] = thread;
3832 }
3833
3834 count_needed = actual;
3835
3836 /* can unlock task now that we've got the thread refs */
3837 task_unlock(task);
3838
3839 if (actual == 0) {
3840 /* no threads, so return null pointer and deallocate memory */
3841
3842 mach_port_array_free(thread_list, count);
3843
3844 *threads_out = NULL;
3845 *countp = 0;
3846 } else {
3847 /* if we allocated too much, must copy */
3848 if (count_needed < count) {
3849 mach_port_array_t newaddr;
3850
3851 newaddr = mach_port_array_alloc(count_needed, Z_WAITOK);
3852 if (newaddr == NULL) {
3853 for (i = 0; i < actual; ++i) {
3854 thread_deallocate(((thread_t *)thread_list)[i]);
3855 }
3856 mach_port_array_free(thread_list, count);
3857 return KERN_RESOURCE_SHORTAGE;
3858 }
3859
3860 bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3861 mach_port_array_free(thread_list, count);
3862 thread_list = newaddr;
3863 }
3864
3865 /* do the conversion that Mig should handle */
3866 convert_thread_array_to_ports(thread_list, actual, flavor);
3867
3868 *threads_out = thread_list;
3869 *countp = actual;
3870 }
3871
3872 return KERN_SUCCESS;
3873 }
3874
3875
3876 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3877 task_threads_from_user(
3878 mach_port_t port,
3879 thread_act_array_t *threads_out,
3880 mach_msg_type_number_t *count)
3881 {
3882 ipc_kobject_type_t kotype;
3883 kern_return_t kr;
3884
3885 task_t task = convert_port_to_task_inspect_no_eval(port);
3886
3887 if (task == TASK_NULL) {
3888 return KERN_INVALID_ARGUMENT;
3889 }
3890
3891 kotype = ip_kotype(port);
3892
3893 switch (kotype) {
3894 case IKOT_TASK_CONTROL:
3895 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3896 break;
3897 case IKOT_TASK_READ:
3898 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3899 break;
3900 case IKOT_TASK_INSPECT:
3901 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3902 break;
3903 default:
3904 panic("strange kobject type");
3905 break;
3906 }
3907
3908 task_deallocate(task);
3909 return kr;
3910 }
3911
3912 #define TASK_HOLD_NORMAL 0
3913 #define TASK_HOLD_PIDSUSPEND 1
3914 #define TASK_HOLD_LEGACY 2
3915 #define TASK_HOLD_LEGACY_ALL 3
3916
3917 static kern_return_t
place_task_hold(task_t task,int mode)3918 place_task_hold(
3919 task_t task,
3920 int mode)
3921 {
3922 if (!task->active && !task_is_a_corpse(task)) {
3923 return KERN_FAILURE;
3924 }
3925
3926 /* Return success for corpse task */
3927 if (task_is_a_corpse(task)) {
3928 return KERN_SUCCESS;
3929 }
3930
3931 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3932 task_pid(task),
3933 task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3934 task->user_stop_count, task->user_stop_count + 1);
3935
3936 #if MACH_ASSERT
3937 current_task()->suspends_outstanding++;
3938 #endif
3939
3940 if (mode == TASK_HOLD_LEGACY) {
3941 task->legacy_stop_count++;
3942 }
3943
3944 #ifdef CONFIG_TASK_SUSPEND_STATS
3945 _task_mark_suspend_source(task);
3946 #endif /* CONFIG_TASK_SUSPEND_STATS */
3947
3948 if (task->user_stop_count++ > 0) {
3949 /*
3950 * If the stop count was positive, the task is
3951 * already stopped and we can exit.
3952 */
3953 return KERN_SUCCESS;
3954 }
3955
3956 /*
3957 * Put a kernel-level hold on the threads in the task (all
3958 * user-level task suspensions added together represent a
3959 * single kernel-level hold). We then wait for the threads
3960 * to stop executing user code.
3961 */
3962 task_hold_locked(task);
3963 task_wait_locked(task, FALSE);
3964
3965 return KERN_SUCCESS;
3966 }
3967
3968 static kern_return_t
release_task_hold(task_t task,int mode)3969 release_task_hold(
3970 task_t task,
3971 int mode)
3972 {
3973 boolean_t release = FALSE;
3974
3975 if (!task->active && !task_is_a_corpse(task)) {
3976 return KERN_FAILURE;
3977 }
3978
3979 /* Return success for corpse task */
3980 if (task_is_a_corpse(task)) {
3981 return KERN_SUCCESS;
3982 }
3983
3984 if (mode == TASK_HOLD_PIDSUSPEND) {
3985 if (task->pidsuspended == FALSE) {
3986 return KERN_FAILURE;
3987 }
3988 task->pidsuspended = FALSE;
3989 }
3990
3991 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3992 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3993 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3994 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3995 task->user_stop_count, mode, task->legacy_stop_count);
3996
3997 #if MACH_ASSERT
3998 /*
3999 * This is obviously not robust; if we suspend one task and then resume a different one,
4000 * we'll fly under the radar. This is only meant to catch the common case of a crashed
4001 * or buggy suspender.
4002 */
4003 current_task()->suspends_outstanding--;
4004 #endif
4005
4006 if (mode == TASK_HOLD_LEGACY_ALL) {
4007 if (task->legacy_stop_count >= task->user_stop_count) {
4008 task->user_stop_count = 0;
4009 release = TRUE;
4010 } else {
4011 task->user_stop_count -= task->legacy_stop_count;
4012 }
4013 task->legacy_stop_count = 0;
4014 } else {
4015 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
4016 task->legacy_stop_count--;
4017 }
4018 if (--task->user_stop_count == 0) {
4019 release = TRUE;
4020 }
4021 }
4022 } else {
4023 return KERN_FAILURE;
4024 }
4025
4026 /*
4027 * Release the task if necessary.
4028 */
4029 if (release) {
4030 task_release_locked(task);
4031 }
4032
4033 return KERN_SUCCESS;
4034 }
4035
4036 boolean_t
get_task_suspended(task_t task)4037 get_task_suspended(task_t task)
4038 {
4039 return 0 != task->user_stop_count;
4040 }
4041
4042 /*
4043 * task_suspend:
4044 *
4045 * Implement an (old-fashioned) user-level suspension on a task.
4046 *
4047 * Because the user isn't expecting to have to manage a suspension
4048 * token, we'll track it for him in the kernel in the form of a naked
4049 * send right to the task's resume port. All such send rights
4050 * account for a single suspension against the task (unlike task_suspend2()
4051 * where each caller gets a unique suspension count represented by a
4052 * unique send-once right).
4053 *
4054 * Conditions:
4055 * The caller holds a reference to the task
4056 */
4057 kern_return_t
task_suspend(task_t task)4058 task_suspend(
4059 task_t task)
4060 {
4061 kern_return_t kr;
4062 mach_port_t port;
4063 mach_port_name_t name;
4064
4065 if (task == TASK_NULL || task == kernel_task) {
4066 return KERN_INVALID_ARGUMENT;
4067 }
4068
4069 /*
4070 * place a legacy hold on the task.
4071 */
4072 task_lock(task);
4073 kr = place_task_hold(task, TASK_HOLD_LEGACY);
4074 task_unlock(task);
4075
4076 if (kr != KERN_SUCCESS) {
4077 return kr;
4078 }
4079
4080 /*
4081 * Claim a send right on the task resume port, and request a no-senders
4082 * notification on that port (if none outstanding).
4083 */
4084 itk_lock(task);
4085 port = task->itk_resume;
4086 if (port == IP_NULL) {
4087 port = ipc_kobject_alloc_port(task, IKOT_TASK_RESUME,
4088 IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
4089 task->itk_resume = port;
4090 } else {
4091 (void)ipc_kobject_make_send_nsrequest(port, task, IKOT_TASK_RESUME);
4092 }
4093 itk_unlock(task);
4094
4095 /*
4096 * Copyout the send right into the calling task's IPC space. It won't know it is there,
4097 * but we'll look it up when calling a traditional resume. Any IPC operations that
4098 * deallocate the send right will auto-release the suspension.
4099 */
4100 if (IP_VALID(port)) {
4101 kr = ipc_object_copyout(current_space(), ip_to_object(port),
4102 MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
4103 NULL, NULL, &name);
4104 } else {
4105 kr = KERN_SUCCESS;
4106 }
4107 if (kr != KERN_SUCCESS) {
4108 printf("warning: %s(%d) failed to copyout suspension "
4109 "token for pid %d with error: %d\n",
4110 proc_name_address(get_bsdtask_info(current_task())),
4111 proc_pid(get_bsdtask_info(current_task())),
4112 task_pid(task), kr);
4113 }
4114
4115 return kr;
4116 }
4117
4118 /*
4119 * task_resume:
4120 * Release a user hold on a task.
4121 *
4122 * Conditions:
4123 * The caller holds a reference to the task
4124 */
4125 kern_return_t
task_resume(task_t task)4126 task_resume(
4127 task_t task)
4128 {
4129 kern_return_t kr;
4130 mach_port_name_t resume_port_name;
4131 ipc_entry_t resume_port_entry;
4132 ipc_space_t space = current_task()->itk_space;
4133
4134 if (task == TASK_NULL || task == kernel_task) {
4135 return KERN_INVALID_ARGUMENT;
4136 }
4137
4138 /* release a legacy task hold */
4139 task_lock(task);
4140 kr = release_task_hold(task, TASK_HOLD_LEGACY);
4141 task_unlock(task);
4142
4143 itk_lock(task); /* for itk_resume */
4144 is_write_lock(space); /* spin lock */
4145 if (is_active(space) && IP_VALID(task->itk_resume) &&
4146 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
4147 /*
4148 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
4149 * we are holding one less legacy hold on the task from this caller. If the release failed,
4150 * go ahead and drop all the rights, as someone either already released our holds or the task
4151 * is gone.
4152 */
4153 itk_unlock(task);
4154 if (kr == KERN_SUCCESS) {
4155 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
4156 } else {
4157 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
4158 }
4159 /* space unlocked */
4160 } else {
4161 itk_unlock(task);
4162 is_write_unlock(space);
4163 if (kr == KERN_SUCCESS) {
4164 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
4165 proc_name_address(get_bsdtask_info(current_task())), proc_pid(get_bsdtask_info(current_task())),
4166 task_pid(task));
4167 }
4168 }
4169
4170 return kr;
4171 }
4172
4173 /*
4174 * Suspend a task that is already protected by a held lock.
4175 * Making/holding a token/reference/port is the caller's responsibility.
4176 */
4177 kern_return_t
task_suspend_internal_locked(task_t task)4178 task_suspend_internal_locked(task_t task)
4179 {
4180 if (task == TASK_NULL || task == kernel_task) {
4181 return KERN_INVALID_ARGUMENT;
4182 }
4183
4184 return place_task_hold(task, TASK_HOLD_NORMAL);
4185 }
4186
4187 /*
4188 * Suspend a task.
4189 * Making/holding a token/reference/port is the caller's responsibility.
4190 */
4191 kern_return_t
task_suspend_internal(task_t task)4192 task_suspend_internal(task_t task)
4193 {
4194 kern_return_t kr;
4195
4196 if (task == TASK_NULL || task == kernel_task) {
4197 return KERN_INVALID_ARGUMENT;
4198 }
4199
4200 task_lock(task);
4201 kr = task_suspend_internal_locked(task);
4202 task_unlock(task);
4203 return kr;
4204 }
4205
4206 /*
4207 * Suspend the target task, and return a suspension token. The token
4208 * represents a reference on the suspended task.
4209 */
4210 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)4211 task_suspend2_grp(
4212 task_t task,
4213 task_suspension_token_t *suspend_token,
4214 task_grp_t grp)
4215 {
4216 kern_return_t kr;
4217
4218 kr = task_suspend_internal(task);
4219 if (kr != KERN_SUCCESS) {
4220 *suspend_token = TASK_NULL;
4221 return kr;
4222 }
4223
4224 /*
4225 * Take a reference on the target task and return that to the caller
4226 * as a "suspension token," which can be converted into an SO right to
4227 * the now-suspended task's resume port.
4228 */
4229 task_reference_grp(task, grp);
4230 *suspend_token = task;
4231
4232 return KERN_SUCCESS;
4233 }
4234
4235 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)4236 task_suspend2_mig(
4237 task_t task,
4238 task_suspension_token_t *suspend_token)
4239 {
4240 return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
4241 }
4242
4243 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)4244 task_suspend2_external(
4245 task_t task,
4246 task_suspension_token_t *suspend_token)
4247 {
4248 return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
4249 }
4250
4251 /*
4252 * Resume a task that is already protected by a held lock.
4253 * (reference/token/port management is caller's responsibility).
4254 */
4255 kern_return_t
task_resume_internal_locked(task_suspension_token_t task)4256 task_resume_internal_locked(
4257 task_suspension_token_t task)
4258 {
4259 if (task == TASK_NULL || task == kernel_task) {
4260 return KERN_INVALID_ARGUMENT;
4261 }
4262
4263 return release_task_hold(task, TASK_HOLD_NORMAL);
4264 }
4265
4266 /*
4267 * Resume a task.
4268 * (reference/token/port management is caller's responsibility).
4269 */
4270 kern_return_t
task_resume_internal(task_suspension_token_t task)4271 task_resume_internal(
4272 task_suspension_token_t task)
4273 {
4274 kern_return_t kr;
4275
4276 if (task == TASK_NULL || task == kernel_task) {
4277 return KERN_INVALID_ARGUMENT;
4278 }
4279
4280 task_lock(task);
4281 kr = task_resume_internal_locked(task);
4282 task_unlock(task);
4283 return kr;
4284 }
4285
4286 /*
4287 * Resume the task using a suspension token. Consumes the token's ref.
4288 */
4289 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)4290 task_resume2_grp(
4291 task_suspension_token_t task,
4292 task_grp_t grp)
4293 {
4294 kern_return_t kr;
4295
4296 kr = task_resume_internal(task);
4297 task_suspension_token_deallocate_grp(task, grp);
4298
4299 return kr;
4300 }
4301
4302 kern_return_t
task_resume2_mig(task_suspension_token_t task)4303 task_resume2_mig(
4304 task_suspension_token_t task)
4305 {
4306 return task_resume2_grp(task, TASK_GRP_MIG);
4307 }
4308
4309 kern_return_t
task_resume2_external(task_suspension_token_t task)4310 task_resume2_external(
4311 task_suspension_token_t task)
4312 {
4313 return task_resume2_grp(task, TASK_GRP_EXTERNAL);
4314 }
4315
4316 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)4317 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
4318 {
4319 task_t task = convert_port_to_task_suspension_token(port);
4320 kern_return_t kr;
4321
4322 if (task == TASK_NULL) {
4323 return;
4324 }
4325
4326 if (task == kernel_task) {
4327 task_suspension_token_deallocate(task);
4328 return;
4329 }
4330
4331 task_lock(task);
4332
4333 kr = ipc_kobject_nsrequest(port, mscount, NULL);
4334 if (kr == KERN_FAILURE) {
4335 /* release all the [remaining] outstanding legacy holds */
4336 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4337 }
4338
4339 task_unlock(task);
4340
4341 task_suspension_token_deallocate(task); /* drop token reference */
4342 }
4343
4344 /*
4345 * Fires when a send once made
4346 * by convert_task_suspension_token_to_port() dies.
4347 */
4348 void
task_suspension_send_once(ipc_port_t port)4349 task_suspension_send_once(ipc_port_t port)
4350 {
4351 task_t task = convert_port_to_task_suspension_token(port);
4352
4353 if (task == TASK_NULL || task == kernel_task) {
4354 return; /* nothing to do */
4355 }
4356
4357 /* release the hold held by this specific send-once right */
4358 task_lock(task);
4359 release_task_hold(task, TASK_HOLD_NORMAL);
4360 task_unlock(task);
4361
4362 task_suspension_token_deallocate(task); /* drop token reference */
4363 }
4364
4365 static kern_return_t
task_pidsuspend_locked(task_t task)4366 task_pidsuspend_locked(task_t task)
4367 {
4368 kern_return_t kr;
4369
4370 if (task->pidsuspended) {
4371 kr = KERN_FAILURE;
4372 goto out;
4373 }
4374
4375 task->pidsuspended = TRUE;
4376
4377 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4378 if (kr != KERN_SUCCESS) {
4379 task->pidsuspended = FALSE;
4380 }
4381 out:
4382 return kr;
4383 }
4384
4385
4386 /*
4387 * task_pidsuspend:
4388 *
4389 * Suspends a task by placing a hold on its threads.
4390 *
4391 * Conditions:
4392 * The caller holds a reference to the task
4393 */
4394 kern_return_t
task_pidsuspend(task_t task)4395 task_pidsuspend(
4396 task_t task)
4397 {
4398 kern_return_t kr;
4399
4400 if (task == TASK_NULL || task == kernel_task) {
4401 return KERN_INVALID_ARGUMENT;
4402 }
4403
4404 task_lock(task);
4405
4406 kr = task_pidsuspend_locked(task);
4407
4408 task_unlock(task);
4409
4410 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4411 iokit_task_app_suspended_changed(task);
4412 }
4413
4414 return kr;
4415 }
4416
4417 /*
4418 * task_pidresume:
4419 * Resumes a previously suspended task.
4420 *
4421 * Conditions:
4422 * The caller holds a reference to the task
4423 */
4424 kern_return_t
task_pidresume(task_t task)4425 task_pidresume(
4426 task_t task)
4427 {
4428 kern_return_t kr;
4429
4430 if (task == TASK_NULL || task == kernel_task) {
4431 return KERN_INVALID_ARGUMENT;
4432 }
4433
4434 task_lock(task);
4435
4436 #if CONFIG_FREEZE
4437
4438 while (task->changing_freeze_state) {
4439 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4440 task_unlock(task);
4441 thread_block(THREAD_CONTINUE_NULL);
4442
4443 task_lock(task);
4444 }
4445 task->changing_freeze_state = TRUE;
4446 #endif
4447
4448 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4449
4450 task_unlock(task);
4451
4452 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4453 iokit_task_app_suspended_changed(task);
4454 }
4455
4456 #if CONFIG_FREEZE
4457
4458 task_lock(task);
4459
4460 if (kr == KERN_SUCCESS) {
4461 task->frozen = FALSE;
4462 }
4463 task->changing_freeze_state = FALSE;
4464 thread_wakeup(&task->changing_freeze_state);
4465
4466 task_unlock(task);
4467 #endif
4468
4469 return kr;
4470 }
4471
4472 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4473
4474 /*
4475 * task_add_turnstile_watchports:
4476 * Setup watchports to boost the main thread of the task.
4477 *
4478 * Arguments:
4479 * task: task being spawned
4480 * thread: main thread of task
4481 * portwatch_ports: array of watchports
4482 * portwatch_count: number of watchports
4483 *
4484 * Conditions:
4485 * Nothing locked.
4486 */
4487 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4488 task_add_turnstile_watchports(
4489 task_t task,
4490 thread_t thread,
4491 ipc_port_t *portwatch_ports,
4492 uint32_t portwatch_count)
4493 {
4494 struct task_watchports *watchports = NULL;
4495 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4496 os_ref_count_t refs;
4497
4498 /* Check if the task has terminated */
4499 if (!task->active) {
4500 return;
4501 }
4502
4503 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4504
4505 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4506
4507 /* Lock the ipc space */
4508 is_write_lock(task->itk_space);
4509
4510 /* Setup watchports to boost the main thread */
4511 refs = task_add_turnstile_watchports_locked(task,
4512 watchports, previous_elem_array, portwatch_ports,
4513 portwatch_count);
4514
4515 /* Drop the space lock */
4516 is_write_unlock(task->itk_space);
4517
4518 if (refs == 0) {
4519 task_watchports_deallocate(watchports);
4520 }
4521
4522 /* Drop the ref on previous_elem_array */
4523 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4524 task_watchport_elem_deallocate(previous_elem_array[i]);
4525 }
4526 }
4527
4528 /*
4529 * task_remove_turnstile_watchports:
4530 * Clear all turnstile boost on the task from watchports.
4531 *
4532 * Arguments:
4533 * task: task being terminated
4534 *
4535 * Conditions:
4536 * Nothing locked.
4537 */
4538 void
task_remove_turnstile_watchports(task_t task)4539 task_remove_turnstile_watchports(
4540 task_t task)
4541 {
4542 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4543 struct task_watchports *watchports = NULL;
4544 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4545 uint32_t portwatch_count;
4546
4547 /* Lock the ipc space */
4548 is_write_lock(task->itk_space);
4549
4550 /* Check if watchport boost exist */
4551 if (task->watchports == NULL) {
4552 is_write_unlock(task->itk_space);
4553 return;
4554 }
4555 watchports = task->watchports;
4556 portwatch_count = watchports->tw_elem_array_count;
4557
4558 refs = task_remove_turnstile_watchports_locked(task, watchports,
4559 port_freelist);
4560
4561 is_write_unlock(task->itk_space);
4562
4563 /* Drop all the port references */
4564 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4565 ip_release(port_freelist[i]);
4566 }
4567
4568 /* Clear the task and thread references for task_watchport */
4569 if (refs == 0) {
4570 task_watchports_deallocate(watchports);
4571 }
4572 }
4573
4574 /*
4575 * task_transfer_turnstile_watchports:
4576 * Transfer all watchport turnstile boost from old task to new task.
4577 *
4578 * Arguments:
4579 * old_task: task calling exec
4580 * new_task: new exec'ed task
4581 * thread: main thread of new task
4582 *
4583 * Conditions:
4584 * Nothing locked.
4585 */
4586 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4587 task_transfer_turnstile_watchports(
4588 task_t old_task,
4589 task_t new_task,
4590 thread_t new_thread)
4591 {
4592 struct task_watchports *old_watchports = NULL;
4593 struct task_watchports *new_watchports = NULL;
4594 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4595 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4596 uint32_t portwatch_count;
4597
4598 if (old_task->watchports == NULL || !new_task->active) {
4599 return;
4600 }
4601
4602 /* Get the watch port count from the old task */
4603 is_write_lock(old_task->itk_space);
4604 if (old_task->watchports == NULL) {
4605 is_write_unlock(old_task->itk_space);
4606 return;
4607 }
4608
4609 portwatch_count = old_task->watchports->tw_elem_array_count;
4610 is_write_unlock(old_task->itk_space);
4611
4612 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4613
4614 /* Lock the ipc space for old task */
4615 is_write_lock(old_task->itk_space);
4616
4617 /* Lock the ipc space for new task */
4618 is_write_lock(new_task->itk_space);
4619
4620 /* Check if watchport boost exist */
4621 if (old_task->watchports == NULL || !new_task->active) {
4622 is_write_unlock(new_task->itk_space);
4623 is_write_unlock(old_task->itk_space);
4624 (void)task_watchports_release(new_watchports);
4625 task_watchports_deallocate(new_watchports);
4626 return;
4627 }
4628
4629 old_watchports = old_task->watchports;
4630 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4631
4632 /* Setup new task watchports */
4633 new_task->watchports = new_watchports;
4634
4635 for (uint32_t i = 0; i < portwatch_count; i++) {
4636 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4637
4638 if (port == NULL) {
4639 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4640 continue;
4641 }
4642
4643 /* Lock the port and check if it has the entry */
4644 ip_mq_lock(port);
4645
4646 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4647
4648 if (ipc_port_replace_watchport_elem_conditional_locked(port,
4649 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4650 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4651
4652 task_watchports_retain(new_watchports);
4653 old_refs = task_watchports_release(old_watchports);
4654
4655 /* Check if all ports are cleaned */
4656 if (old_refs == 0) {
4657 old_task->watchports = NULL;
4658 }
4659 } else {
4660 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4661 }
4662 /* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4663 }
4664
4665 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4666 new_refs = task_watchports_release(new_watchports);
4667 if (new_refs == 0) {
4668 new_task->watchports = NULL;
4669 }
4670
4671 is_write_unlock(new_task->itk_space);
4672 is_write_unlock(old_task->itk_space);
4673
4674 /* Clear the task and thread references for old_watchport */
4675 if (old_refs == 0) {
4676 task_watchports_deallocate(old_watchports);
4677 }
4678
4679 /* Clear the task and thread references for new_watchport */
4680 if (new_refs == 0) {
4681 task_watchports_deallocate(new_watchports);
4682 }
4683 }
4684
4685 /*
4686 * task_add_turnstile_watchports_locked:
4687 * Setup watchports to boost the main thread of the task.
4688 *
4689 * Arguments:
4690 * task: task to boost
4691 * watchports: watchport structure to be attached to the task
4692 * previous_elem_array: an array of old watchport_elem to be returned to caller
4693 * portwatch_ports: array of watchports
4694 * portwatch_count: number of watchports
4695 *
4696 * Conditions:
4697 * ipc space of the task locked.
4698 * returns array of old watchport_elem in previous_elem_array
4699 */
4700 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4701 task_add_turnstile_watchports_locked(
4702 task_t task,
4703 struct task_watchports *watchports,
4704 struct task_watchport_elem **previous_elem_array,
4705 ipc_port_t *portwatch_ports,
4706 uint32_t portwatch_count)
4707 {
4708 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4709
4710 /* Check if the task is still active */
4711 if (!task->active) {
4712 refs = task_watchports_release(watchports);
4713 return refs;
4714 }
4715
4716 assert(task->watchports == NULL);
4717 task->watchports = watchports;
4718
4719 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4720 ipc_port_t port = portwatch_ports[i];
4721
4722 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4723 if (port == NULL) {
4724 task_watchport_elem_clear(&watchports->tw_elem[i]);
4725 continue;
4726 }
4727
4728 ip_mq_lock(port);
4729
4730 /* Check if port is in valid state to be setup as watchport */
4731 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4732 &previous_elem_array[j]) != KERN_SUCCESS) {
4733 task_watchport_elem_clear(&watchports->tw_elem[i]);
4734 continue;
4735 }
4736 /* port unlocked on return */
4737
4738 ip_reference(port);
4739 task_watchports_retain(watchports);
4740 if (previous_elem_array[j] != NULL) {
4741 j++;
4742 }
4743 }
4744
4745 /* Drop the reference on task_watchport struct returned by os_ref_init */
4746 refs = task_watchports_release(watchports);
4747 if (refs == 0) {
4748 task->watchports = NULL;
4749 }
4750
4751 return refs;
4752 }
4753
4754 /*
4755 * task_remove_turnstile_watchports_locked:
4756 * Clear all turnstile boost on the task from watchports.
4757 *
4758 * Arguments:
4759 * task: task to remove watchports from
4760 * watchports: watchports structure for the task
4761 * port_freelist: array of ports returned with ref to caller
4762 *
4763 *
4764 * Conditions:
4765 * ipc space of the task locked.
4766 * array of ports with refs are returned in port_freelist
4767 */
4768 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4769 task_remove_turnstile_watchports_locked(
4770 task_t task,
4771 struct task_watchports *watchports,
4772 ipc_port_t *port_freelist)
4773 {
4774 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4775
4776 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4777 ipc_port_t port = watchports->tw_elem[i].twe_port;
4778 if (port == NULL) {
4779 continue;
4780 }
4781
4782 /* Lock the port and check if it has the entry */
4783 ip_mq_lock(port);
4784 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4785 &watchports->tw_elem[i]) == KERN_SUCCESS) {
4786 task_watchport_elem_clear(&watchports->tw_elem[i]);
4787 port_freelist[j++] = port;
4788 refs = task_watchports_release(watchports);
4789
4790 /* Check if all ports are cleaned */
4791 if (refs == 0) {
4792 task->watchports = NULL;
4793 break;
4794 }
4795 }
4796 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4797 }
4798 return refs;
4799 }
4800
4801 /*
4802 * task_watchports_alloc_init:
4803 * Allocate and initialize task watchport struct.
4804 *
4805 * Conditions:
4806 * Nothing locked.
4807 */
4808 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4809 task_watchports_alloc_init(
4810 task_t task,
4811 thread_t thread,
4812 uint32_t count)
4813 {
4814 struct task_watchports *watchports = kalloc_type(struct task_watchports,
4815 struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4816
4817 task_reference(task);
4818 thread_reference(thread);
4819 watchports->tw_task = task;
4820 watchports->tw_thread = thread;
4821 watchports->tw_elem_array_count = count;
4822 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4823
4824 return watchports;
4825 }
4826
4827 /*
4828 * task_watchports_deallocate:
4829 * Deallocate task watchport struct.
4830 *
4831 * Conditions:
4832 * Nothing locked.
4833 */
4834 static void
task_watchports_deallocate(struct task_watchports * watchports)4835 task_watchports_deallocate(
4836 struct task_watchports *watchports)
4837 {
4838 uint32_t portwatch_count = watchports->tw_elem_array_count;
4839
4840 task_deallocate(watchports->tw_task);
4841 thread_deallocate(watchports->tw_thread);
4842 kfree_type(struct task_watchports, struct task_watchport_elem,
4843 portwatch_count, watchports);
4844 }
4845
4846 /*
4847 * task_watchport_elem_deallocate:
4848 * Deallocate task watchport element and release its ref on task_watchport.
4849 *
4850 * Conditions:
4851 * Nothing locked.
4852 */
4853 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4854 task_watchport_elem_deallocate(
4855 struct task_watchport_elem *watchport_elem)
4856 {
4857 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4858 task_t task = watchport_elem->twe_task;
4859 struct task_watchports *watchports = NULL;
4860 ipc_port_t port = NULL;
4861
4862 assert(task != NULL);
4863
4864 /* Take the space lock to modify the elememt */
4865 is_write_lock(task->itk_space);
4866
4867 watchports = task->watchports;
4868 assert(watchports != NULL);
4869
4870 port = watchport_elem->twe_port;
4871 assert(port != NULL);
4872
4873 task_watchport_elem_clear(watchport_elem);
4874 refs = task_watchports_release(watchports);
4875
4876 if (refs == 0) {
4877 task->watchports = NULL;
4878 }
4879
4880 is_write_unlock(task->itk_space);
4881
4882 ip_release(port);
4883 if (refs == 0) {
4884 task_watchports_deallocate(watchports);
4885 }
4886 }
4887
4888 /*
4889 * task_has_watchports:
4890 * Return TRUE if task has watchport boosts.
4891 *
4892 * Conditions:
4893 * Nothing locked.
4894 */
4895 boolean_t
task_has_watchports(task_t task)4896 task_has_watchports(task_t task)
4897 {
4898 return task->watchports != NULL;
4899 }
4900
4901 #if DEVELOPMENT || DEBUG
4902
4903 extern void IOSleep(int);
4904
4905 kern_return_t
task_disconnect_page_mappings(task_t task)4906 task_disconnect_page_mappings(task_t task)
4907 {
4908 int n;
4909
4910 if (task == TASK_NULL || task == kernel_task) {
4911 return KERN_INVALID_ARGUMENT;
4912 }
4913
4914 /*
4915 * this function is used to strip all of the mappings from
4916 * the pmap for the specified task to force the task to
4917 * re-fault all of the pages it is actively using... this
4918 * allows us to approximate the true working set of the
4919 * specified task. We only engage if at least 1 of the
4920 * threads in the task is runnable, but we want to continuously
4921 * sweep (at least for a while - I've arbitrarily set the limit at
4922 * 100 sweeps to be re-looked at as we gain experience) to get a better
4923 * view into what areas within a page are being visited (as opposed to only
4924 * seeing the first fault of a page after the task becomes
4925 * runnable)... in the future I may
4926 * try to block until awakened by a thread in this task
4927 * being made runnable, but for now we'll periodically poll from the
4928 * user level debug tool driving the sysctl
4929 */
4930 for (n = 0; n < 100; n++) {
4931 thread_t thread;
4932 boolean_t runnable;
4933 boolean_t do_unnest;
4934 int page_count;
4935
4936 runnable = FALSE;
4937 do_unnest = FALSE;
4938
4939 task_lock(task);
4940
4941 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4942 if (thread->state & TH_RUN) {
4943 runnable = TRUE;
4944 break;
4945 }
4946 }
4947 if (n == 0) {
4948 task->task_disconnected_count++;
4949 }
4950
4951 if (task->task_unnested == FALSE) {
4952 if (runnable == TRUE) {
4953 task->task_unnested = TRUE;
4954 do_unnest = TRUE;
4955 }
4956 }
4957 task_unlock(task);
4958
4959 if (runnable == FALSE) {
4960 break;
4961 }
4962
4963 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4964 task, do_unnest, task->task_disconnected_count, 0, 0);
4965
4966 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4967
4968 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4969 task, page_count, 0, 0, 0);
4970
4971 if ((n % 5) == 4) {
4972 IOSleep(1);
4973 }
4974 }
4975 return KERN_SUCCESS;
4976 }
4977
4978 #endif
4979
4980
4981 #if CONFIG_FREEZE
4982
4983 /*
4984 * task_freeze:
4985 *
4986 * Freeze a task.
4987 *
4988 * Conditions:
4989 * The caller holds a reference to the task
4990 */
4991 extern void vm_wake_compactor_swapper(void);
4992 extern struct freezer_context freezer_context_global;
4993
4994 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4995 task_freeze(
4996 task_t task,
4997 uint32_t *purgeable_count,
4998 uint32_t *wired_count,
4999 uint32_t *clean_count,
5000 uint32_t *dirty_count,
5001 uint32_t dirty_budget,
5002 uint32_t *shared_count,
5003 int *freezer_error_code,
5004 boolean_t eval_only)
5005 {
5006 kern_return_t kr = KERN_SUCCESS;
5007
5008 if (task == TASK_NULL || task == kernel_task) {
5009 return KERN_INVALID_ARGUMENT;
5010 }
5011
5012 task_lock(task);
5013
5014 while (task->changing_freeze_state) {
5015 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5016 task_unlock(task);
5017 thread_block(THREAD_CONTINUE_NULL);
5018
5019 task_lock(task);
5020 }
5021 if (task->frozen) {
5022 task_unlock(task);
5023 return KERN_FAILURE;
5024 }
5025 task->changing_freeze_state = TRUE;
5026
5027 freezer_context_global.freezer_ctx_task = task;
5028
5029 task_unlock(task);
5030
5031 kr = vm_map_freeze(task,
5032 purgeable_count,
5033 wired_count,
5034 clean_count,
5035 dirty_count,
5036 dirty_budget,
5037 shared_count,
5038 freezer_error_code,
5039 eval_only);
5040
5041 task_lock(task);
5042
5043 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
5044 task->frozen = TRUE;
5045
5046 freezer_context_global.freezer_ctx_task = NULL;
5047 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
5048
5049 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
5050 /*
5051 * reset the counter tracking the # of swapped compressed pages
5052 * because we are now done with this freeze session and task.
5053 */
5054
5055 *dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /*used to track pageouts*/
5056 }
5057
5058 freezer_context_global.freezer_ctx_swapped_bytes = 0;
5059 }
5060
5061 task->changing_freeze_state = FALSE;
5062 thread_wakeup(&task->changing_freeze_state);
5063
5064 task_unlock(task);
5065
5066 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
5067 (kr == KERN_SUCCESS) &&
5068 (eval_only == FALSE)) {
5069 vm_wake_compactor_swapper();
5070 /*
5071 * We do an explicit wakeup of the swapout thread here
5072 * because the compact_and_swap routines don't have
5073 * knowledge about these kind of "per-task packed c_segs"
5074 * and so will not be evaluating whether we need to do
5075 * a wakeup there.
5076 */
5077 thread_wakeup((event_t)&vm_swapout_thread);
5078 }
5079
5080 return kr;
5081 }
5082
5083 /*
5084 * task_thaw:
5085 *
5086 * Thaw a currently frozen task.
5087 *
5088 * Conditions:
5089 * The caller holds a reference to the task
5090 */
5091 kern_return_t
task_thaw(task_t task)5092 task_thaw(
5093 task_t task)
5094 {
5095 if (task == TASK_NULL || task == kernel_task) {
5096 return KERN_INVALID_ARGUMENT;
5097 }
5098
5099 task_lock(task);
5100
5101 while (task->changing_freeze_state) {
5102 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5103 task_unlock(task);
5104 thread_block(THREAD_CONTINUE_NULL);
5105
5106 task_lock(task);
5107 }
5108 if (!task->frozen) {
5109 task_unlock(task);
5110 return KERN_FAILURE;
5111 }
5112 task->frozen = FALSE;
5113
5114 task_unlock(task);
5115
5116 return KERN_SUCCESS;
5117 }
5118
5119 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)5120 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
5121 {
5122 /*
5123 * We don't assert that the task lock is held because we call this
5124 * routine from the decompression path and we won't be holding the
5125 * task lock. However, since we are in the context of the task we are
5126 * safe.
5127 * In the case of the task_freeze path, we call it from behind the task
5128 * lock but we don't need to because we have a reference on the proc
5129 * being frozen.
5130 */
5131
5132 assert(task);
5133 if (amount == 0) {
5134 return;
5135 }
5136
5137 if (op == CREDIT_TO_SWAP) {
5138 ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5139 } else if (op == DEBIT_FROM_SWAP) {
5140 ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5141 } else {
5142 panic("task_update_frozen_to_swap_acct: Invalid ledger op");
5143 }
5144 }
5145 #endif /* CONFIG_FREEZE */
5146
5147 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)5148 task_set_security_tokens(
5149 task_t task,
5150 security_token_t sec_token,
5151 audit_token_t audit_token,
5152 host_priv_t host_priv)
5153 {
5154 ipc_port_t host_port = IP_NULL;
5155 kern_return_t kr;
5156
5157 if (task == TASK_NULL) {
5158 return KERN_INVALID_ARGUMENT;
5159 }
5160
5161 task_lock(task);
5162 task_set_tokens(task, &sec_token, &audit_token);
5163 task_unlock(task);
5164
5165 if (host_priv != HOST_PRIV_NULL) {
5166 kr = host_get_host_priv_port(host_priv, &host_port);
5167 } else {
5168 kr = host_get_host_port(host_priv_self(), &host_port);
5169 }
5170 assert(kr == KERN_SUCCESS);
5171
5172 kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
5173 return kr;
5174 }
5175
5176 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)5177 task_send_trace_memory(
5178 __unused task_t target_task,
5179 __unused uint32_t pid,
5180 __unused uint64_t uniqueid)
5181 {
5182 return KERN_INVALID_ARGUMENT;
5183 }
5184
5185 /*
5186 * This routine was added, pretty much exclusively, for registering the
5187 * RPC glue vector for in-kernel short circuited tasks. Rather than
5188 * removing it completely, I have only disabled that feature (which was
5189 * the only feature at the time). It just appears that we are going to
5190 * want to add some user data to tasks in the future (i.e. bsd info,
5191 * task names, etc...), so I left it in the formal task interface.
5192 */
5193 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)5194 task_set_info(
5195 task_t task,
5196 task_flavor_t flavor,
5197 __unused task_info_t task_info_in, /* pointer to IN array */
5198 __unused mach_msg_type_number_t task_info_count)
5199 {
5200 if (task == TASK_NULL) {
5201 return KERN_INVALID_ARGUMENT;
5202 }
5203 switch (flavor) {
5204 #if CONFIG_ATM
5205 case TASK_TRACE_MEMORY_INFO:
5206 return KERN_NOT_SUPPORTED;
5207 #endif // CONFIG_ATM
5208 default:
5209 return KERN_INVALID_ARGUMENT;
5210 }
5211 }
5212
5213 static void
_task_fill_times(task_t task,time_value_t * user_time,time_value_t * sys_time)5214 _task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
5215 {
5216 clock_sec_t sec;
5217 clock_usec_t usec;
5218
5219 struct recount_times_mach times = recount_task_terminated_times(task);
5220 absolutetime_to_microtime(times.rtm_user, &sec, &usec);
5221 user_time->seconds = (typeof(user_time->seconds))sec;
5222 user_time->microseconds = usec;
5223 absolutetime_to_microtime(times.rtm_system, &sec, &usec);
5224 sys_time->seconds = (typeof(sys_time->seconds))sec;
5225 sys_time->microseconds = usec;
5226 }
5227
5228 int radar_20146450 = 1;
5229 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5230 task_info(
5231 task_t task,
5232 task_flavor_t flavor,
5233 task_info_t task_info_out,
5234 mach_msg_type_number_t *task_info_count)
5235 {
5236 kern_return_t error = KERN_SUCCESS;
5237 mach_msg_type_number_t original_task_info_count;
5238 bool is_kernel_task = (task == kernel_task);
5239
5240 if (task == TASK_NULL) {
5241 return KERN_INVALID_ARGUMENT;
5242 }
5243
5244 original_task_info_count = *task_info_count;
5245 task_lock(task);
5246
5247 if (task != current_task() && !task->active) {
5248 task_unlock(task);
5249 return KERN_INVALID_ARGUMENT;
5250 }
5251
5252
5253 switch (flavor) {
5254 case TASK_BASIC_INFO_32:
5255 case TASK_BASIC2_INFO_32:
5256 #if defined(__arm64__)
5257 case TASK_BASIC_INFO_64:
5258 #endif
5259 {
5260 task_basic_info_32_t basic_info;
5261 ledger_amount_t tmp;
5262
5263 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
5264 error = KERN_INVALID_ARGUMENT;
5265 break;
5266 }
5267
5268 basic_info = (task_basic_info_32_t)task_info_out;
5269
5270 basic_info->virtual_size = (typeof(basic_info->virtual_size))
5271 vm_map_adjusted_size(is_kernel_task ? kernel_map : task->map);
5272 if (flavor == TASK_BASIC2_INFO_32) {
5273 /*
5274 * The "BASIC2" flavor gets the maximum resident
5275 * size instead of the current resident size...
5276 */
5277 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
5278 } else {
5279 ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
5280 }
5281 basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
5282
5283 _task_fill_times(task, &basic_info->user_time,
5284 &basic_info->system_time);
5285
5286 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5287 basic_info->suspend_count = task->user_stop_count;
5288
5289 *task_info_count = TASK_BASIC_INFO_32_COUNT;
5290 break;
5291 }
5292
5293 #if defined(__arm64__)
5294 case TASK_BASIC_INFO_64_2:
5295 {
5296 task_basic_info_64_2_t basic_info;
5297
5298 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
5299 error = KERN_INVALID_ARGUMENT;
5300 break;
5301 }
5302
5303 basic_info = (task_basic_info_64_2_t)task_info_out;
5304
5305 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5306 kernel_map : task->map);
5307 ledger_get_balance(task->ledger, task_ledgers.phys_mem,
5308 (ledger_amount_t *)&basic_info->resident_size);
5309 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5310 basic_info->suspend_count = task->user_stop_count;
5311 _task_fill_times(task, &basic_info->user_time,
5312 &basic_info->system_time);
5313
5314 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
5315 break;
5316 }
5317
5318 #else /* defined(__arm64__) */
5319 case TASK_BASIC_INFO_64:
5320 {
5321 task_basic_info_64_t basic_info;
5322
5323 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5324 error = KERN_INVALID_ARGUMENT;
5325 break;
5326 }
5327
5328 basic_info = (task_basic_info_64_t)task_info_out;
5329
5330 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5331 kernel_map : task->map);
5332 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5333 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5334 basic_info->suspend_count = task->user_stop_count;
5335 _task_fill_times(task, &basic_info->user_time,
5336 &basic_info->system_time);
5337
5338 *task_info_count = TASK_BASIC_INFO_64_COUNT;
5339 break;
5340 }
5341 #endif /* defined(__arm64__) */
5342
5343 case MACH_TASK_BASIC_INFO:
5344 {
5345 mach_task_basic_info_t basic_info;
5346
5347 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5348 error = KERN_INVALID_ARGUMENT;
5349 break;
5350 }
5351
5352 basic_info = (mach_task_basic_info_t)task_info_out;
5353
5354 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5355 kernel_map : task->map);
5356 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
5357 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
5358 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5359 basic_info->suspend_count = task->user_stop_count;
5360 _task_fill_times(task, &basic_info->user_time,
5361 &basic_info->system_time);
5362
5363 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5364 break;
5365 }
5366
5367 case TASK_THREAD_TIMES_INFO:
5368 {
5369 task_thread_times_info_t times_info;
5370 thread_t thread;
5371
5372 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5373 error = KERN_INVALID_ARGUMENT;
5374 break;
5375 }
5376
5377 times_info = (task_thread_times_info_t)task_info_out;
5378 times_info->user_time = (time_value_t){ 0 };
5379 times_info->system_time = (time_value_t){ 0 };
5380
5381 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5382 if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5383 time_value_t user_time, system_time;
5384
5385 thread_read_times(thread, &user_time, &system_time, NULL);
5386 time_value_add(×_info->user_time, &user_time);
5387 time_value_add(×_info->system_time, &system_time);
5388 }
5389 }
5390
5391 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5392 break;
5393 }
5394
5395 case TASK_ABSOLUTETIME_INFO:
5396 {
5397 task_absolutetime_info_t info;
5398
5399 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5400 error = KERN_INVALID_ARGUMENT;
5401 break;
5402 }
5403
5404 info = (task_absolutetime_info_t)task_info_out;
5405
5406 struct recount_times_mach term_times =
5407 recount_task_terminated_times(task);
5408 struct recount_times_mach total_times = recount_task_times(task);
5409
5410 info->total_user = total_times.rtm_user;
5411 info->total_system = total_times.rtm_system;
5412 info->threads_user = total_times.rtm_user - term_times.rtm_user;
5413 info->threads_system += total_times.rtm_system - term_times.rtm_system;
5414
5415 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5416 break;
5417 }
5418
5419 case TASK_DYLD_INFO:
5420 {
5421 task_dyld_info_t info;
5422
5423 /*
5424 * We added the format field to TASK_DYLD_INFO output. For
5425 * temporary backward compatibility, accept the fact that
5426 * clients may ask for the old version - distinquished by the
5427 * size of the expected result structure.
5428 */
5429 #define TASK_LEGACY_DYLD_INFO_COUNT \
5430 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5431
5432 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5433 error = KERN_INVALID_ARGUMENT;
5434 break;
5435 }
5436
5437 info = (task_dyld_info_t)task_info_out;
5438 info->all_image_info_addr = task->all_image_info_addr;
5439 info->all_image_info_size = task->all_image_info_size;
5440
5441 /* only set format on output for those expecting it */
5442 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5443 info->all_image_info_format = task_has_64Bit_addr(task) ?
5444 TASK_DYLD_ALL_IMAGE_INFO_64 :
5445 TASK_DYLD_ALL_IMAGE_INFO_32;
5446 *task_info_count = TASK_DYLD_INFO_COUNT;
5447 } else {
5448 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5449 }
5450 break;
5451 }
5452
5453 case TASK_EXTMOD_INFO:
5454 {
5455 task_extmod_info_t info;
5456 void *p;
5457
5458 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5459 error = KERN_INVALID_ARGUMENT;
5460 break;
5461 }
5462
5463 info = (task_extmod_info_t)task_info_out;
5464
5465 p = get_bsdtask_info(task);
5466 if (p) {
5467 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5468 } else {
5469 bzero(info->task_uuid, sizeof(info->task_uuid));
5470 }
5471 info->extmod_statistics = task->extmod_statistics;
5472 *task_info_count = TASK_EXTMOD_INFO_COUNT;
5473
5474 break;
5475 }
5476
5477 case TASK_KERNELMEMORY_INFO:
5478 {
5479 task_kernelmemory_info_t tkm_info;
5480 ledger_amount_t credit, debit;
5481
5482 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5483 error = KERN_INVALID_ARGUMENT;
5484 break;
5485 }
5486
5487 tkm_info = (task_kernelmemory_info_t) task_info_out;
5488 tkm_info->total_palloc = 0;
5489 tkm_info->total_pfree = 0;
5490 tkm_info->total_salloc = 0;
5491 tkm_info->total_sfree = 0;
5492
5493 if (task == kernel_task) {
5494 /*
5495 * All shared allocs/frees from other tasks count against
5496 * the kernel private memory usage. If we are looking up
5497 * info for the kernel task, gather from everywhere.
5498 */
5499 task_unlock(task);
5500
5501 /* start by accounting for all the terminated tasks against the kernel */
5502 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5503 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5504
5505 /* count all other task/thread shared alloc/free against the kernel */
5506 lck_mtx_lock(&tasks_threads_lock);
5507
5508 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5509 queue_iterate(&tasks, task, task_t, tasks) {
5510 if (task == kernel_task) {
5511 if (ledger_get_entries(task->ledger,
5512 task_ledgers.tkm_private, &credit,
5513 &debit) == KERN_SUCCESS) {
5514 tkm_info->total_palloc += credit;
5515 tkm_info->total_pfree += debit;
5516 }
5517 }
5518 if (!ledger_get_entries(task->ledger,
5519 task_ledgers.tkm_shared, &credit, &debit)) {
5520 tkm_info->total_palloc += credit;
5521 tkm_info->total_pfree += debit;
5522 }
5523 }
5524 lck_mtx_unlock(&tasks_threads_lock);
5525 } else {
5526 if (!ledger_get_entries(task->ledger,
5527 task_ledgers.tkm_private, &credit, &debit)) {
5528 tkm_info->total_palloc = credit;
5529 tkm_info->total_pfree = debit;
5530 }
5531 if (!ledger_get_entries(task->ledger,
5532 task_ledgers.tkm_shared, &credit, &debit)) {
5533 tkm_info->total_salloc = credit;
5534 tkm_info->total_sfree = debit;
5535 }
5536 task_unlock(task);
5537 }
5538
5539 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5540 return KERN_SUCCESS;
5541 }
5542
5543 /* OBSOLETE */
5544 case TASK_SCHED_FIFO_INFO:
5545 {
5546 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5547 error = KERN_INVALID_ARGUMENT;
5548 break;
5549 }
5550
5551 error = KERN_INVALID_POLICY;
5552 break;
5553 }
5554
5555 /* OBSOLETE */
5556 case TASK_SCHED_RR_INFO:
5557 {
5558 policy_rr_base_t rr_base;
5559 uint32_t quantum_time;
5560 uint64_t quantum_ns;
5561
5562 if (*task_info_count < POLICY_RR_BASE_COUNT) {
5563 error = KERN_INVALID_ARGUMENT;
5564 break;
5565 }
5566
5567 rr_base = (policy_rr_base_t) task_info_out;
5568
5569 if (task != kernel_task) {
5570 error = KERN_INVALID_POLICY;
5571 break;
5572 }
5573
5574 rr_base->base_priority = task->priority;
5575
5576 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5577 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5578
5579 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5580
5581 *task_info_count = POLICY_RR_BASE_COUNT;
5582 break;
5583 }
5584
5585 /* OBSOLETE */
5586 case TASK_SCHED_TIMESHARE_INFO:
5587 {
5588 policy_timeshare_base_t ts_base;
5589
5590 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5591 error = KERN_INVALID_ARGUMENT;
5592 break;
5593 }
5594
5595 ts_base = (policy_timeshare_base_t) task_info_out;
5596
5597 if (task == kernel_task) {
5598 error = KERN_INVALID_POLICY;
5599 break;
5600 }
5601
5602 ts_base->base_priority = task->priority;
5603
5604 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5605 break;
5606 }
5607
5608 case TASK_SECURITY_TOKEN:
5609 {
5610 security_token_t *sec_token_p;
5611
5612 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5613 error = KERN_INVALID_ARGUMENT;
5614 break;
5615 }
5616
5617 sec_token_p = (security_token_t *) task_info_out;
5618
5619 *sec_token_p = *task_get_sec_token(task);
5620
5621 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
5622 break;
5623 }
5624
5625 case TASK_AUDIT_TOKEN:
5626 {
5627 audit_token_t *audit_token_p;
5628
5629 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5630 error = KERN_INVALID_ARGUMENT;
5631 break;
5632 }
5633
5634 audit_token_p = (audit_token_t *) task_info_out;
5635
5636 *audit_token_p = *task_get_audit_token(task);
5637
5638 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
5639 break;
5640 }
5641
5642 case TASK_SCHED_INFO:
5643 error = KERN_INVALID_ARGUMENT;
5644 break;
5645
5646 case TASK_EVENTS_INFO:
5647 {
5648 task_events_info_t events_info;
5649 thread_t thread;
5650 uint64_t n_syscalls_mach, n_syscalls_unix, n_csw;
5651
5652 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5653 error = KERN_INVALID_ARGUMENT;
5654 break;
5655 }
5656
5657 events_info = (task_events_info_t) task_info_out;
5658
5659
5660 events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5661 events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5662 events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5663 events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5664 events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5665
5666 n_syscalls_mach = task->syscalls_mach;
5667 n_syscalls_unix = task->syscalls_unix;
5668 n_csw = task->c_switch;
5669
5670 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5671 n_csw += thread->c_switch;
5672 n_syscalls_mach += thread->syscalls_mach;
5673 n_syscalls_unix += thread->syscalls_unix;
5674 }
5675
5676 events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5677 events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5678 events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5679
5680 *task_info_count = TASK_EVENTS_INFO_COUNT;
5681 break;
5682 }
5683 case TASK_AFFINITY_TAG_INFO:
5684 {
5685 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5686 error = KERN_INVALID_ARGUMENT;
5687 break;
5688 }
5689
5690 error = task_affinity_info(task, task_info_out, task_info_count);
5691 break;
5692 }
5693 case TASK_POWER_INFO:
5694 {
5695 if (*task_info_count < TASK_POWER_INFO_COUNT) {
5696 error = KERN_INVALID_ARGUMENT;
5697 break;
5698 }
5699
5700 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5701 break;
5702 }
5703
5704 case TASK_POWER_INFO_V2:
5705 {
5706 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5707 error = KERN_INVALID_ARGUMENT;
5708 break;
5709 }
5710 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5711 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5712 break;
5713 }
5714
5715 case TASK_VM_INFO:
5716 case TASK_VM_INFO_PURGEABLE:
5717 {
5718 task_vm_info_t vm_info;
5719 vm_map_t map;
5720 ledger_amount_t tmp_amount;
5721
5722 struct proc *p;
5723 uint32_t platform, sdk;
5724 p = current_proc();
5725 platform = proc_platform(p);
5726 sdk = proc_sdk(p);
5727 if (original_task_info_count > TASK_VM_INFO_COUNT) {
5728 /*
5729 * Some iOS apps pass an incorrect value for
5730 * task_info_count, expressed in number of bytes
5731 * instead of number of "natural_t" elements, which
5732 * can lead to binary compatibility issues (including
5733 * stack corruption) when the data structure is
5734 * expanded in the future.
5735 * Let's make this potential issue visible by
5736 * logging about it...
5737 */
5738 printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5739 "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5740 "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5741 __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5742 flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5743 platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5744 DTRACE_VM4(suspicious_task_vm_info_count,
5745 mach_msg_type_number_t, original_task_info_count,
5746 mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5747 uint32_t, platform,
5748 uint32_t, sdk);
5749 }
5750 #if __arm64__
5751 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5752 platform == PLATFORM_IOS &&
5753 sdk != 0 &&
5754 (sdk >> 16) <= 12) {
5755 /*
5756 * Some iOS apps pass an incorrect value for
5757 * task_info_count, expressed in number of bytes
5758 * instead of number of "natural_t" elements.
5759 * For the sake of backwards binary compatibility
5760 * for apps built with an iOS12 or older SDK and using
5761 * the "rev2" data structure, let's fix task_info_count
5762 * for them, to avoid stomping past the actual end
5763 * of their buffer.
5764 */
5765 #if DEVELOPMENT || DEBUG
5766 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5767 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5768 proc_name_address(p), original_task_info_count,
5769 TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5770 ((sdk >> 8) & 0xff), (sdk & 0xff));
5771 #endif /* DEVELOPMENT || DEBUG */
5772 DTRACE_VM4(workaround_task_vm_info_count,
5773 mach_msg_type_number_t, original_task_info_count,
5774 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5775 uint32_t, platform,
5776 uint32_t, sdk);
5777 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5778 *task_info_count = original_task_info_count;
5779 }
5780 if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5781 platform == PLATFORM_IOS &&
5782 sdk != 0 &&
5783 (sdk >> 16) <= 15) {
5784 /*
5785 * Some iOS apps pass an incorrect value for
5786 * task_info_count, expressed in number of bytes
5787 * instead of number of "natural_t" elements.
5788 */
5789 printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5790 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5791 proc_name_address(p), original_task_info_count,
5792 TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5793 ((sdk >> 8) & 0xff), (sdk & 0xff));
5794 DTRACE_VM4(workaround_task_vm_info_count,
5795 mach_msg_type_number_t, original_task_info_count,
5796 mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5797 uint32_t, platform,
5798 uint32_t, sdk);
5799 #if DEVELOPMENT || DEBUG
5800 /*
5801 * For the sake of internal builds livability,
5802 * work around this user-space bug by capping the
5803 * buffer's size to what it was with the iOS15 SDK.
5804 */
5805 original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5806 *task_info_count = original_task_info_count;
5807 #endif /* DEVELOPMENT || DEBUG */
5808 }
5809 #endif /* __arm64__ */
5810
5811 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5812 error = KERN_INVALID_ARGUMENT;
5813 break;
5814 }
5815
5816 vm_info = (task_vm_info_t)task_info_out;
5817
5818 /*
5819 * Do not hold both the task and map locks,
5820 * so convert the task lock into a map reference,
5821 * drop the task lock, then lock the map.
5822 */
5823 if (is_kernel_task) {
5824 map = kernel_map;
5825 task_unlock(task);
5826 /* no lock, no reference */
5827 } else {
5828 map = task->map;
5829 vm_map_reference(map);
5830 task_unlock(task);
5831 vm_map_lock_read(map);
5832 }
5833
5834 vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5835 vm_info->region_count = map->hdr.nentries;
5836 vm_info->page_size = vm_map_page_size(map);
5837
5838 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5839 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5840
5841 vm_info->device = 0;
5842 vm_info->device_peak = 0;
5843 ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5844 ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5845 ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5846 ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5847 ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5848 ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5849 ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5850 ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5851 ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5852
5853 vm_info->purgeable_volatile_pmap = 0;
5854 vm_info->purgeable_volatile_resident = 0;
5855 vm_info->purgeable_volatile_virtual = 0;
5856 if (is_kernel_task) {
5857 /*
5858 * We do not maintain the detailed stats for the
5859 * kernel_pmap, so just count everything as
5860 * "internal"...
5861 */
5862 vm_info->internal = vm_info->resident_size;
5863 /*
5864 * ... but since the memory held by the VM compressor
5865 * in the kernel address space ought to be attributed
5866 * to user-space tasks, we subtract it from "internal"
5867 * to give memory reporting tools a more accurate idea
5868 * of what the kernel itself is actually using, instead
5869 * of making it look like the kernel is leaking memory
5870 * when the system is under memory pressure.
5871 */
5872 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5873 PAGE_SIZE);
5874 } else {
5875 mach_vm_size_t volatile_virtual_size;
5876 mach_vm_size_t volatile_resident_size;
5877 mach_vm_size_t volatile_compressed_size;
5878 mach_vm_size_t volatile_pmap_size;
5879 mach_vm_size_t volatile_compressed_pmap_size;
5880 kern_return_t kr;
5881
5882 if (flavor == TASK_VM_INFO_PURGEABLE) {
5883 kr = vm_map_query_volatile(
5884 map,
5885 &volatile_virtual_size,
5886 &volatile_resident_size,
5887 &volatile_compressed_size,
5888 &volatile_pmap_size,
5889 &volatile_compressed_pmap_size);
5890 if (kr == KERN_SUCCESS) {
5891 vm_info->purgeable_volatile_pmap =
5892 volatile_pmap_size;
5893 if (radar_20146450) {
5894 vm_info->compressed -=
5895 volatile_compressed_pmap_size;
5896 }
5897 vm_info->purgeable_volatile_resident =
5898 volatile_resident_size;
5899 vm_info->purgeable_volatile_virtual =
5900 volatile_virtual_size;
5901 }
5902 }
5903 }
5904 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5905
5906 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5907 /* must be captured while we still have the map lock */
5908 vm_info->min_address = map->min_offset;
5909 vm_info->max_address = map->max_offset;
5910 }
5911
5912 /*
5913 * Done with vm map things, can drop the map lock and reference,
5914 * and take the task lock back.
5915 *
5916 * Re-validate that the task didn't die on us.
5917 */
5918 if (!is_kernel_task) {
5919 vm_map_unlock_read(map);
5920 vm_map_deallocate(map);
5921 }
5922 map = VM_MAP_NULL;
5923
5924 task_lock(task);
5925
5926 if ((task != current_task()) && (!task->active)) {
5927 error = KERN_INVALID_ARGUMENT;
5928 break;
5929 }
5930
5931 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5932 vm_info->phys_footprint =
5933 (mach_vm_size_t) get_task_phys_footprint(task);
5934 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5935 }
5936 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5937 /* data was captured above */
5938 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5939 }
5940
5941 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5942 ledger_get_lifetime_max(task->ledger,
5943 task_ledgers.phys_footprint,
5944 &vm_info->ledger_phys_footprint_peak);
5945 ledger_get_balance(task->ledger,
5946 task_ledgers.purgeable_nonvolatile,
5947 &vm_info->ledger_purgeable_nonvolatile);
5948 ledger_get_balance(task->ledger,
5949 task_ledgers.purgeable_nonvolatile_compressed,
5950 &vm_info->ledger_purgeable_novolatile_compressed);
5951 ledger_get_balance(task->ledger,
5952 task_ledgers.purgeable_volatile,
5953 &vm_info->ledger_purgeable_volatile);
5954 ledger_get_balance(task->ledger,
5955 task_ledgers.purgeable_volatile_compressed,
5956 &vm_info->ledger_purgeable_volatile_compressed);
5957 ledger_get_balance(task->ledger,
5958 task_ledgers.network_nonvolatile,
5959 &vm_info->ledger_tag_network_nonvolatile);
5960 ledger_get_balance(task->ledger,
5961 task_ledgers.network_nonvolatile_compressed,
5962 &vm_info->ledger_tag_network_nonvolatile_compressed);
5963 ledger_get_balance(task->ledger,
5964 task_ledgers.network_volatile,
5965 &vm_info->ledger_tag_network_volatile);
5966 ledger_get_balance(task->ledger,
5967 task_ledgers.network_volatile_compressed,
5968 &vm_info->ledger_tag_network_volatile_compressed);
5969 ledger_get_balance(task->ledger,
5970 task_ledgers.media_footprint,
5971 &vm_info->ledger_tag_media_footprint);
5972 ledger_get_balance(task->ledger,
5973 task_ledgers.media_footprint_compressed,
5974 &vm_info->ledger_tag_media_footprint_compressed);
5975 ledger_get_balance(task->ledger,
5976 task_ledgers.media_nofootprint,
5977 &vm_info->ledger_tag_media_nofootprint);
5978 ledger_get_balance(task->ledger,
5979 task_ledgers.media_nofootprint_compressed,
5980 &vm_info->ledger_tag_media_nofootprint_compressed);
5981 ledger_get_balance(task->ledger,
5982 task_ledgers.graphics_footprint,
5983 &vm_info->ledger_tag_graphics_footprint);
5984 ledger_get_balance(task->ledger,
5985 task_ledgers.graphics_footprint_compressed,
5986 &vm_info->ledger_tag_graphics_footprint_compressed);
5987 ledger_get_balance(task->ledger,
5988 task_ledgers.graphics_nofootprint,
5989 &vm_info->ledger_tag_graphics_nofootprint);
5990 ledger_get_balance(task->ledger,
5991 task_ledgers.graphics_nofootprint_compressed,
5992 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5993 ledger_get_balance(task->ledger,
5994 task_ledgers.neural_footprint,
5995 &vm_info->ledger_tag_neural_footprint);
5996 ledger_get_balance(task->ledger,
5997 task_ledgers.neural_footprint_compressed,
5998 &vm_info->ledger_tag_neural_footprint_compressed);
5999 ledger_get_balance(task->ledger,
6000 task_ledgers.neural_nofootprint,
6001 &vm_info->ledger_tag_neural_nofootprint);
6002 ledger_get_balance(task->ledger,
6003 task_ledgers.neural_nofootprint_compressed,
6004 &vm_info->ledger_tag_neural_nofootprint_compressed);
6005 *task_info_count = TASK_VM_INFO_REV3_COUNT;
6006 }
6007 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
6008 if (get_bsdtask_info(task)) {
6009 vm_info->limit_bytes_remaining =
6010 memorystatus_available_memory_internal(get_bsdtask_info(task));
6011 } else {
6012 vm_info->limit_bytes_remaining = 0;
6013 }
6014 *task_info_count = TASK_VM_INFO_REV4_COUNT;
6015 }
6016 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
6017 thread_t thread;
6018 uint64_t total = task->decompressions;
6019 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6020 total += thread->decompressions;
6021 }
6022 vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
6023 *task_info_count = TASK_VM_INFO_REV5_COUNT;
6024 }
6025 if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
6026 ledger_get_balance(task->ledger, task_ledgers.swapins,
6027 &vm_info->ledger_swapins);
6028 *task_info_count = TASK_VM_INFO_REV6_COUNT;
6029 }
6030
6031 break;
6032 }
6033
6034 case TASK_WAIT_STATE_INFO:
6035 {
6036 /*
6037 * Deprecated flavor. Currently allowing some results until all users
6038 * stop calling it. The results may not be accurate.
6039 */
6040 task_wait_state_info_t wait_state_info;
6041 uint64_t total_sfi_ledger_val = 0;
6042
6043 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
6044 error = KERN_INVALID_ARGUMENT;
6045 break;
6046 }
6047
6048 wait_state_info = (task_wait_state_info_t) task_info_out;
6049
6050 wait_state_info->total_wait_state_time = 0;
6051 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
6052
6053 #if CONFIG_SCHED_SFI
6054 int i, prev_lentry = -1;
6055 int64_t val_credit, val_debit;
6056
6057 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
6058 val_credit = 0;
6059 /*
6060 * checking with prev_lentry != entry ensures adjacent classes
6061 * which share the same ledger do not add wait times twice.
6062 * Note: Use ledger() call to get data for each individual sfi class.
6063 */
6064 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
6065 KERN_SUCCESS == ledger_get_entries(task->ledger,
6066 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
6067 total_sfi_ledger_val += val_credit;
6068 }
6069 prev_lentry = task_ledgers.sfi_wait_times[i];
6070 }
6071
6072 #endif /* CONFIG_SCHED_SFI */
6073 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
6074 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
6075
6076 break;
6077 }
6078 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
6079 {
6080 #if DEVELOPMENT || DEBUG
6081 pvm_account_info_t acnt_info;
6082
6083 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
6084 error = KERN_INVALID_ARGUMENT;
6085 break;
6086 }
6087
6088 if (task_info_out == NULL) {
6089 error = KERN_INVALID_ARGUMENT;
6090 break;
6091 }
6092
6093 acnt_info = (pvm_account_info_t) task_info_out;
6094
6095 error = vm_purgeable_account(task, acnt_info);
6096
6097 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
6098
6099 break;
6100 #else /* DEVELOPMENT || DEBUG */
6101 error = KERN_NOT_SUPPORTED;
6102 break;
6103 #endif /* DEVELOPMENT || DEBUG */
6104 }
6105 case TASK_FLAGS_INFO:
6106 {
6107 task_flags_info_t flags_info;
6108
6109 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
6110 error = KERN_INVALID_ARGUMENT;
6111 break;
6112 }
6113
6114 flags_info = (task_flags_info_t)task_info_out;
6115
6116 /* only publish the 64-bit flag of the task */
6117 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
6118
6119 *task_info_count = TASK_FLAGS_INFO_COUNT;
6120 break;
6121 }
6122
6123 case TASK_DEBUG_INFO_INTERNAL:
6124 {
6125 #if DEVELOPMENT || DEBUG
6126 task_debug_info_internal_t dbg_info;
6127 ipc_space_t space = task->itk_space;
6128 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
6129 error = KERN_NOT_SUPPORTED;
6130 break;
6131 }
6132
6133 if (task_info_out == NULL) {
6134 error = KERN_INVALID_ARGUMENT;
6135 break;
6136 }
6137 dbg_info = (task_debug_info_internal_t) task_info_out;
6138 dbg_info->ipc_space_size = 0;
6139
6140 if (space) {
6141 smr_ipc_enter();
6142 ipc_entry_table_t table = smr_entered_load(&space->is_table);
6143 if (table) {
6144 dbg_info->ipc_space_size =
6145 ipc_entry_table_count(table);
6146 }
6147 smr_ipc_leave();
6148 }
6149
6150 dbg_info->suspend_count = task->suspend_count;
6151
6152 error = KERN_SUCCESS;
6153 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
6154 break;
6155 #else /* DEVELOPMENT || DEBUG */
6156 error = KERN_NOT_SUPPORTED;
6157 break;
6158 #endif /* DEVELOPMENT || DEBUG */
6159 }
6160 case TASK_SUSPEND_STATS_INFO:
6161 {
6162 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6163 if (*task_info_count < TASK_SUSPEND_STATS_INFO_COUNT || task_info_out == NULL) {
6164 error = KERN_INVALID_ARGUMENT;
6165 break;
6166 }
6167 error = _task_get_suspend_stats_locked(task, (task_suspend_stats_t)task_info_out);
6168 *task_info_count = TASK_SUSPEND_STATS_INFO_COUNT;
6169 break;
6170 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6171 error = KERN_NOT_SUPPORTED;
6172 break;
6173 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6174 }
6175 case TASK_SUSPEND_SOURCES_INFO:
6176 {
6177 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6178 if (*task_info_count < TASK_SUSPEND_SOURCES_INFO_COUNT || task_info_out == NULL) {
6179 error = KERN_INVALID_ARGUMENT;
6180 break;
6181 }
6182 error = _task_get_suspend_sources_locked(task, (task_suspend_source_t)task_info_out);
6183 *task_info_count = TASK_SUSPEND_SOURCES_INFO_COUNT;
6184 break;
6185 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6186 error = KERN_NOT_SUPPORTED;
6187 break;
6188 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6189 }
6190 default:
6191 error = KERN_INVALID_ARGUMENT;
6192 }
6193
6194 task_unlock(task);
6195 return error;
6196 }
6197
6198 /*
6199 * task_info_from_user
6200 *
6201 * When calling task_info from user space,
6202 * this function will be executed as mig server side
6203 * instead of calling directly into task_info.
6204 * This gives the possibility to perform more security
6205 * checks on task_port.
6206 *
6207 * In the case of TASK_DYLD_INFO, we require the more
6208 * privileged task_read_port not the less-privileged task_name_port.
6209 *
6210 */
6211 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)6212 task_info_from_user(
6213 mach_port_t task_port,
6214 task_flavor_t flavor,
6215 task_info_t task_info_out,
6216 mach_msg_type_number_t *task_info_count)
6217 {
6218 task_t task;
6219 kern_return_t ret;
6220
6221 if (flavor == TASK_DYLD_INFO) {
6222 task = convert_port_to_task_read(task_port);
6223 } else {
6224 task = convert_port_to_task_name(task_port);
6225 }
6226
6227 ret = task_info(task, flavor, task_info_out, task_info_count);
6228
6229 task_deallocate(task);
6230
6231 return ret;
6232 }
6233
6234 /*
6235 * Routine: task_dyld_process_info_update_helper
6236 *
6237 * Release send rights in release_ports.
6238 *
6239 * If no active ports found in task's dyld notifier array, unset the magic value
6240 * in user space to indicate so.
6241 *
6242 * Condition:
6243 * task's itk_lock is locked, and is unlocked upon return.
6244 * Global g_dyldinfo_mtx is locked, and is unlocked upon return.
6245 */
6246 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)6247 task_dyld_process_info_update_helper(
6248 task_t task,
6249 size_t active_count,
6250 vm_map_address_t magic_addr, /* a userspace address */
6251 ipc_port_t *release_ports,
6252 size_t release_count)
6253 {
6254 void *notifiers_ptr = NULL;
6255
6256 assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
6257
6258 if (active_count == 0) {
6259 assert(task->itk_dyld_notify != NULL);
6260 notifiers_ptr = task->itk_dyld_notify;
6261 task->itk_dyld_notify = NULL;
6262 itk_unlock(task);
6263
6264 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6265 (void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
6266 } else {
6267 itk_unlock(task);
6268 (void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
6269 magic_addr); /* reset magic */
6270 }
6271
6272 lck_mtx_unlock(&g_dyldinfo_mtx);
6273
6274 for (size_t i = 0; i < release_count; i++) {
6275 ipc_port_release_send(release_ports[i]);
6276 }
6277 }
6278
6279 /*
6280 * Routine: task_dyld_process_info_notify_register
6281 *
6282 * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
6283 * memory for the array if it's the first port to be registered. Also cleanup
6284 * any dead rights found in the array.
6285 *
6286 * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
6287 *
6288 * Args:
6289 * task: Target task for the registration.
6290 * sright: A send right.
6291 *
6292 * Returns:
6293 * KERN_SUCCESS: Registration succeeded.
6294 * KERN_INVALID_TASK: task is invalid.
6295 * KERN_INVALID_RIGHT: sright is invalid.
6296 * KERN_DENIED: Security policy denied this call.
6297 * KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
6298 * KERN_NO_SPACE: No available notifier port slot left for this task.
6299 * KERN_RIGHT_EXISTS: The notifier port is already registered and active.
6300 *
6301 * Other error code see task_info().
6302 *
6303 * See Also:
6304 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6305 */
6306 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)6307 task_dyld_process_info_notify_register(
6308 task_t task,
6309 ipc_port_t sright)
6310 {
6311 struct task_dyld_info dyld_info;
6312 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6313 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6314 uint32_t release_count = 0, active_count = 0;
6315 mach_vm_address_t ports_addr; /* a user space address */
6316 kern_return_t kr;
6317 boolean_t right_exists = false;
6318 ipc_port_t *notifiers_ptr = NULL;
6319 ipc_port_t *portp;
6320
6321 if (task == TASK_NULL || task == kernel_task) {
6322 return KERN_INVALID_TASK;
6323 }
6324
6325 if (!IP_VALID(sright)) {
6326 return KERN_INVALID_RIGHT;
6327 }
6328
6329 #if CONFIG_MACF
6330 if (mac_task_check_dyld_process_info_notify_register()) {
6331 return KERN_DENIED;
6332 }
6333 #endif
6334
6335 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6336 if (kr) {
6337 return kr;
6338 }
6339
6340 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6341 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6342 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6343 } else {
6344 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6345 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6346 }
6347
6348 retry:
6349 if (task->itk_dyld_notify == NULL) {
6350 notifiers_ptr = kalloc_type(ipc_port_t,
6351 DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
6352 Z_WAITOK | Z_ZERO | Z_NOFAIL);
6353 }
6354
6355 lck_mtx_lock(&g_dyldinfo_mtx);
6356 itk_lock(task);
6357
6358 if (task->itk_dyld_notify == NULL) {
6359 if (notifiers_ptr == NULL) {
6360 itk_unlock(task);
6361 lck_mtx_unlock(&g_dyldinfo_mtx);
6362 goto retry;
6363 }
6364 task->itk_dyld_notify = notifiers_ptr;
6365 notifiers_ptr = NULL;
6366 }
6367
6368 assert(task->itk_dyld_notify != NULL);
6369 /* First pass: clear dead names and check for duplicate registration */
6370 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6371 portp = &task->itk_dyld_notify[slot];
6372 if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
6373 release_ports[release_count++] = *portp;
6374 *portp = IPC_PORT_NULL;
6375 } else if (*portp == sright) {
6376 /* the port is already registered and is active */
6377 right_exists = true;
6378 }
6379
6380 if (*portp != IPC_PORT_NULL) {
6381 active_count++;
6382 }
6383 }
6384
6385 if (right_exists) {
6386 /* skip second pass */
6387 kr = KERN_RIGHT_EXISTS;
6388 goto out;
6389 }
6390
6391 /* Second pass: register the port */
6392 kr = KERN_NO_SPACE;
6393 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6394 portp = &task->itk_dyld_notify[slot];
6395 if (*portp == IPC_PORT_NULL) {
6396 *portp = sright;
6397 active_count++;
6398 kr = KERN_SUCCESS;
6399 break;
6400 }
6401 }
6402
6403 out:
6404 assert(active_count > 0);
6405
6406 task_dyld_process_info_update_helper(task, active_count,
6407 (vm_map_address_t)ports_addr, release_ports, release_count);
6408 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6409
6410 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6411
6412 return kr;
6413 }
6414
6415 /*
6416 * Routine: task_dyld_process_info_notify_deregister
6417 *
6418 * Remove a send right in target task's itk_dyld_notify array matching the receive
6419 * right name passed in. Deallocate kernel memory for the array if it's the last port to
6420 * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6421 *
6422 * Does not consume any reference.
6423 *
6424 * Args:
6425 * task: Target task for the deregistration.
6426 * rcv_name: The name denoting the receive right in caller's space.
6427 *
6428 * Returns:
6429 * KERN_SUCCESS: A matching entry found and degistration succeeded.
6430 * KERN_INVALID_TASK: task is invalid.
6431 * KERN_INVALID_NAME: name is invalid.
6432 * KERN_DENIED: Security policy denied this call.
6433 * KERN_FAILURE: A matching entry is not found.
6434 * KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6435 *
6436 * Other error code see task_info().
6437 *
6438 * See Also:
6439 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6440 */
6441 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6442 task_dyld_process_info_notify_deregister(
6443 task_t task,
6444 mach_port_name_t rcv_name)
6445 {
6446 struct task_dyld_info dyld_info;
6447 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6448 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6449 uint32_t release_count = 0, active_count = 0;
6450 boolean_t port_found = false;
6451 mach_vm_address_t ports_addr; /* a user space address */
6452 ipc_port_t sright;
6453 kern_return_t kr;
6454 ipc_port_t *portp;
6455
6456 if (task == TASK_NULL || task == kernel_task) {
6457 return KERN_INVALID_TASK;
6458 }
6459
6460 if (!MACH_PORT_VALID(rcv_name)) {
6461 return KERN_INVALID_NAME;
6462 }
6463
6464 #if CONFIG_MACF
6465 if (mac_task_check_dyld_process_info_notify_register()) {
6466 return KERN_DENIED;
6467 }
6468 #endif
6469
6470 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6471 if (kr) {
6472 return kr;
6473 }
6474
6475 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6476 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6477 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6478 } else {
6479 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6480 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6481 }
6482
6483 kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6484 if (kr) {
6485 return KERN_INVALID_RIGHT;
6486 }
6487
6488 ip_reference(sright);
6489 ip_mq_unlock(sright);
6490
6491 assert(sright != IPC_PORT_NULL);
6492
6493 lck_mtx_lock(&g_dyldinfo_mtx);
6494 itk_lock(task);
6495
6496 if (task->itk_dyld_notify == NULL) {
6497 itk_unlock(task);
6498 lck_mtx_unlock(&g_dyldinfo_mtx);
6499 ip_release(sright);
6500 return KERN_FAILURE;
6501 }
6502
6503 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6504 portp = &task->itk_dyld_notify[slot];
6505 if (*portp == sright) {
6506 release_ports[release_count++] = *portp;
6507 *portp = IPC_PORT_NULL;
6508 port_found = true;
6509 } else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6510 release_ports[release_count++] = *portp;
6511 *portp = IPC_PORT_NULL;
6512 }
6513
6514 if (*portp != IPC_PORT_NULL) {
6515 active_count++;
6516 }
6517 }
6518
6519 task_dyld_process_info_update_helper(task, active_count,
6520 (vm_map_address_t)ports_addr, release_ports, release_count);
6521 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6522
6523 ip_release(sright);
6524
6525 return port_found ? KERN_SUCCESS : KERN_FAILURE;
6526 }
6527
6528 /*
6529 * task_power_info
6530 *
6531 * Returns power stats for the task.
6532 * Note: Called with task locked.
6533 */
6534 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,struct task_power_info_extra * extra_info)6535 task_power_info_locked(
6536 task_t task,
6537 task_power_info_t info,
6538 gpu_energy_data_t ginfo,
6539 task_power_info_v2_t infov2,
6540 struct task_power_info_extra *extra_info)
6541 {
6542 thread_t thread;
6543 ledger_amount_t tmp;
6544
6545 uint64_t runnable_time_sum = 0;
6546
6547 task_lock_assert_owned(task);
6548
6549 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6550 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6551 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6552 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6553
6554 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6555 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6556
6557 struct recount_usage usage = { 0 };
6558 struct recount_usage usage_perf = { 0 };
6559 recount_task_usage_perf_only(task, &usage, &usage_perf);
6560
6561 info->total_user = usage.ru_metrics[RCT_LVL_USER].rm_time_mach;
6562 info->total_system = recount_usage_system_time_mach(&usage);
6563 runnable_time_sum = task->total_runnable_time;
6564
6565 if (ginfo) {
6566 ginfo->task_gpu_utilisation = task->task_gpu_ns;
6567 }
6568
6569 if (infov2) {
6570 infov2->task_ptime = recount_usage_time_mach(&usage_perf);
6571 infov2->task_pset_switches = task->ps_switch;
6572 #if CONFIG_PERVASIVE_ENERGY
6573 infov2->task_energy = usage.ru_energy_nj;
6574 #endif /* CONFIG_PERVASIVE_ENERGY */
6575 }
6576
6577 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6578 spl_t x;
6579
6580 if (thread->options & TH_OPT_IDLE_THREAD) {
6581 continue;
6582 }
6583
6584 x = splsched();
6585 thread_lock(thread);
6586
6587 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6588 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6589
6590 if (infov2) {
6591 infov2->task_pset_switches += thread->ps_switch;
6592 }
6593
6594 runnable_time_sum += timer_grab(&thread->runnable_timer);
6595
6596 if (ginfo) {
6597 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6598 }
6599 thread_unlock(thread);
6600 splx(x);
6601 }
6602
6603 if (extra_info) {
6604 extra_info->runnable_time = runnable_time_sum;
6605 #if CONFIG_PERVASIVE_CPI
6606 extra_info->cycles = recount_usage_cycles(&usage);
6607 extra_info->instructions = recount_usage_instructions(&usage);
6608 extra_info->pcycles = recount_usage_cycles(&usage_perf);
6609 extra_info->pinstructions = recount_usage_instructions(&usage_perf);
6610 extra_info->user_ptime = usage_perf.ru_metrics[RCT_LVL_USER].rm_time_mach;
6611 extra_info->system_ptime = recount_usage_system_time_mach(&usage_perf);
6612 #endif // CONFIG_PERVASIVE_CPI
6613 #if CONFIG_PERVASIVE_ENERGY
6614 extra_info->energy = usage.ru_energy_nj;
6615 extra_info->penergy = usage_perf.ru_energy_nj;
6616 #endif // CONFIG_PERVASIVE_ENERGY
6617 #if RECOUNT_SECURE_METRICS
6618 if (PE_i_can_has_debugger(NULL)) {
6619 extra_info->secure_time = usage.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6620 extra_info->secure_ptime = usage_perf.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6621 }
6622 #endif // RECOUNT_SECURE_METRICS
6623 }
6624 }
6625
6626 /*
6627 * task_gpu_utilisation
6628 *
6629 * Returns the total gpu time used by the all the threads of the task
6630 * (both dead and alive)
6631 */
6632 uint64_t
task_gpu_utilisation(task_t task)6633 task_gpu_utilisation(
6634 task_t task)
6635 {
6636 uint64_t gpu_time = 0;
6637 #if defined(__x86_64__)
6638 thread_t thread;
6639
6640 task_lock(task);
6641 gpu_time += task->task_gpu_ns;
6642
6643 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6644 spl_t x;
6645 x = splsched();
6646 thread_lock(thread);
6647 gpu_time += ml_gpu_stat(thread);
6648 thread_unlock(thread);
6649 splx(x);
6650 }
6651
6652 task_unlock(task);
6653 #else /* defined(__x86_64__) */
6654 /* silence compiler warning */
6655 (void)task;
6656 #endif /* defined(__x86_64__) */
6657 return gpu_time;
6658 }
6659
6660 /* This function updates the cpu time in the arrays for each
6661 * effective and requested QoS class
6662 */
6663 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6664 task_update_cpu_time_qos_stats(
6665 task_t task,
6666 uint64_t *eqos_stats,
6667 uint64_t *rqos_stats)
6668 {
6669 if (!eqos_stats && !rqos_stats) {
6670 return;
6671 }
6672
6673 task_lock(task);
6674 thread_t thread;
6675 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6676 if (thread->options & TH_OPT_IDLE_THREAD) {
6677 continue;
6678 }
6679
6680 thread_update_qos_cpu_time(thread);
6681 }
6682
6683 if (eqos_stats) {
6684 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6685 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6686 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6687 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6688 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6689 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6690 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6691 }
6692
6693 if (rqos_stats) {
6694 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6695 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6696 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6697 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6698 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6699 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6700 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6701 }
6702
6703 task_unlock(task);
6704 }
6705
6706 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6707 task_purgable_info(
6708 task_t task,
6709 task_purgable_info_t *stats)
6710 {
6711 if (task == TASK_NULL || stats == NULL) {
6712 return KERN_INVALID_ARGUMENT;
6713 }
6714 /* Take task reference */
6715 task_reference(task);
6716 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6717 /* Drop task reference */
6718 task_deallocate(task);
6719 return KERN_SUCCESS;
6720 }
6721
6722 void
task_vtimer_set(task_t task,integer_t which)6723 task_vtimer_set(
6724 task_t task,
6725 integer_t which)
6726 {
6727 thread_t thread;
6728 spl_t x;
6729
6730 task_lock(task);
6731
6732 task->vtimers |= which;
6733
6734 switch (which) {
6735 case TASK_VTIMER_USER:
6736 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6737 x = splsched();
6738 thread_lock(thread);
6739 struct recount_times_mach times = recount_thread_times(thread);
6740 thread->vtimer_user_save = times.rtm_user;
6741 thread_unlock(thread);
6742 splx(x);
6743 }
6744 break;
6745
6746 case TASK_VTIMER_PROF:
6747 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6748 x = splsched();
6749 thread_lock(thread);
6750 thread->vtimer_prof_save = recount_thread_time_mach(thread);
6751 thread_unlock(thread);
6752 splx(x);
6753 }
6754 break;
6755
6756 case TASK_VTIMER_RLIM:
6757 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6758 x = splsched();
6759 thread_lock(thread);
6760 thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6761 thread_unlock(thread);
6762 splx(x);
6763 }
6764 break;
6765 }
6766
6767 task_unlock(task);
6768 }
6769
6770 void
task_vtimer_clear(task_t task,integer_t which)6771 task_vtimer_clear(
6772 task_t task,
6773 integer_t which)
6774 {
6775 task_lock(task);
6776
6777 task->vtimers &= ~which;
6778
6779 task_unlock(task);
6780 }
6781
6782 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6783 task_vtimer_update(
6784 __unused
6785 task_t task,
6786 integer_t which,
6787 uint32_t *microsecs)
6788 {
6789 thread_t thread = current_thread();
6790 uint32_t tdelt = 0;
6791 clock_sec_t secs = 0;
6792 uint64_t tsum;
6793
6794 assert(task == current_task());
6795
6796 spl_t s = splsched();
6797 thread_lock(thread);
6798
6799 if ((task->vtimers & which) != (uint32_t)which) {
6800 thread_unlock(thread);
6801 splx(s);
6802 return;
6803 }
6804
6805 switch (which) {
6806 case TASK_VTIMER_USER:;
6807 struct recount_times_mach times = recount_thread_times(thread);
6808 tsum = times.rtm_user;
6809 tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6810 thread->vtimer_user_save = tsum;
6811 absolutetime_to_microtime(tdelt, &secs, microsecs);
6812 break;
6813
6814 case TASK_VTIMER_PROF:
6815 tsum = recount_current_thread_time_mach();
6816 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6817 absolutetime_to_microtime(tdelt, &secs, microsecs);
6818 /* if the time delta is smaller than a usec, ignore */
6819 if (*microsecs != 0) {
6820 thread->vtimer_prof_save = tsum;
6821 }
6822 break;
6823
6824 case TASK_VTIMER_RLIM:
6825 tsum = recount_current_thread_time_mach();
6826 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6827 thread->vtimer_rlim_save = tsum;
6828 absolutetime_to_microtime(tdelt, &secs, microsecs);
6829 break;
6830 }
6831
6832 thread_unlock(thread);
6833 splx(s);
6834 }
6835
6836 uint64_t
get_task_dispatchqueue_offset(task_t task)6837 get_task_dispatchqueue_offset(
6838 task_t task)
6839 {
6840 return task->dispatchqueue_offset;
6841 }
6842
6843 void
task_synchronizer_destroy_all(task_t task)6844 task_synchronizer_destroy_all(task_t task)
6845 {
6846 /*
6847 * Destroy owned semaphores
6848 */
6849 semaphore_destroy_all(task);
6850 }
6851
6852 /*
6853 * Install default (machine-dependent) initial thread state
6854 * on the task. Subsequent thread creation will have this initial
6855 * state set on the thread by machine_thread_inherit_taskwide().
6856 * Flavors and structures are exactly the same as those to thread_set_state()
6857 */
6858 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6859 task_set_state(
6860 task_t task,
6861 int flavor,
6862 thread_state_t state,
6863 mach_msg_type_number_t state_count)
6864 {
6865 kern_return_t ret;
6866
6867 if (task == TASK_NULL) {
6868 return KERN_INVALID_ARGUMENT;
6869 }
6870
6871 task_lock(task);
6872
6873 if (!task->active) {
6874 task_unlock(task);
6875 return KERN_FAILURE;
6876 }
6877
6878 ret = machine_task_set_state(task, flavor, state, state_count);
6879
6880 task_unlock(task);
6881 return ret;
6882 }
6883
6884 /*
6885 * Examine the default (machine-dependent) initial thread state
6886 * on the task, as set by task_set_state(). Flavors and structures
6887 * are exactly the same as those passed to thread_get_state().
6888 */
6889 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6890 task_get_state(
6891 task_t task,
6892 int flavor,
6893 thread_state_t state,
6894 mach_msg_type_number_t *state_count)
6895 {
6896 kern_return_t ret;
6897
6898 if (task == TASK_NULL) {
6899 return KERN_INVALID_ARGUMENT;
6900 }
6901
6902 task_lock(task);
6903
6904 if (!task->active) {
6905 task_unlock(task);
6906 return KERN_FAILURE;
6907 }
6908
6909 ret = machine_task_get_state(task, flavor, state, state_count);
6910
6911 task_unlock(task);
6912 return ret;
6913 }
6914
6915
6916 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,boolean_t backtrace_only)6917 PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6918 mach_exception_code_t code,
6919 mach_exception_subcode_t subcode,
6920 void *reason,
6921 boolean_t backtrace_only)
6922 {
6923 #ifdef MACH_BSD
6924 if (1 == proc_selfpid()) {
6925 return KERN_NOT_SUPPORTED; // initproc is immune
6926 }
6927 #endif
6928 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6929 [0] = code,
6930 [1] = subcode,
6931 };
6932 task_t task = current_task();
6933 kern_return_t kr;
6934 void *bsd_info = get_bsdtask_info(task);
6935
6936 /* (See jetsam-related comments below) */
6937
6938 proc_memstat_skip(bsd_info, TRUE);
6939 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason, backtrace_only);
6940 proc_memstat_skip(bsd_info, FALSE);
6941 return kr;
6942 }
6943
6944 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,bool backtrace_only)6945 task_violated_guard(
6946 mach_exception_code_t code,
6947 mach_exception_subcode_t subcode,
6948 void *reason,
6949 bool backtrace_only)
6950 {
6951 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6952 }
6953
6954
6955 #if CONFIG_MEMORYSTATUS
6956
6957 boolean_t
task_get_memlimit_is_active(task_t task)6958 task_get_memlimit_is_active(task_t task)
6959 {
6960 assert(task != NULL);
6961
6962 if (task->memlimit_is_active == 1) {
6963 return TRUE;
6964 } else {
6965 return FALSE;
6966 }
6967 }
6968
6969 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6970 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6971 {
6972 assert(task != NULL);
6973
6974 if (memlimit_is_active) {
6975 task->memlimit_is_active = 1;
6976 } else {
6977 task->memlimit_is_active = 0;
6978 }
6979 }
6980
6981 boolean_t
task_get_memlimit_is_fatal(task_t task)6982 task_get_memlimit_is_fatal(task_t task)
6983 {
6984 assert(task != NULL);
6985
6986 if (task->memlimit_is_fatal == 1) {
6987 return TRUE;
6988 } else {
6989 return FALSE;
6990 }
6991 }
6992
6993 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6994 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6995 {
6996 assert(task != NULL);
6997
6998 if (memlimit_is_fatal) {
6999 task->memlimit_is_fatal = 1;
7000 } else {
7001 task->memlimit_is_fatal = 0;
7002 }
7003 }
7004
7005 uint64_t
task_get_dirty_start(task_t task)7006 task_get_dirty_start(task_t task)
7007 {
7008 return task->memstat_dirty_start;
7009 }
7010
7011 void
task_set_dirty_start(task_t task,uint64_t start)7012 task_set_dirty_start(task_t task, uint64_t start)
7013 {
7014 task_lock(task);
7015 task->memstat_dirty_start = start;
7016 task_unlock(task);
7017 }
7018
7019 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)7020 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7021 {
7022 boolean_t triggered = FALSE;
7023
7024 assert(task == current_task());
7025
7026 /*
7027 * Returns true, if task has already triggered an exc_resource exception.
7028 */
7029
7030 if (memlimit_is_active) {
7031 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
7032 } else {
7033 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
7034 }
7035
7036 return triggered;
7037 }
7038
7039 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)7040 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7041 {
7042 assert(task == current_task());
7043
7044 /*
7045 * We allow one exc_resource per process per active/inactive limit.
7046 * The limit's fatal attribute does not come into play.
7047 */
7048
7049 if (memlimit_is_active) {
7050 task->memlimit_active_exc_resource = 1;
7051 } else {
7052 task->memlimit_inactive_exc_resource = 1;
7053 }
7054 }
7055
7056 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
7057
7058 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,send_exec_resource_options_t exception_options)7059 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options)
7060 {
7061 task_t task = current_task();
7062 int pid = 0;
7063 const char *procname = "unknown";
7064 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7065 boolean_t send_sync_exc_resource = FALSE;
7066 void *cur_bsd_info = get_bsdtask_info(current_task());
7067
7068 #ifdef MACH_BSD
7069 pid = proc_selfpid();
7070
7071 if (pid == 1) {
7072 /*
7073 * Cannot have ReportCrash analyzing
7074 * a suspended initproc.
7075 */
7076 return;
7077 }
7078
7079 if (cur_bsd_info != NULL) {
7080 procname = proc_name_address(cur_bsd_info);
7081 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(cur_bsd_info);
7082 }
7083 #endif
7084 #if CONFIG_COREDUMP
7085 if (hwm_user_cores) {
7086 int error;
7087 uint64_t starttime, end;
7088 clock_sec_t secs = 0;
7089 uint32_t microsecs = 0;
7090
7091 starttime = mach_absolute_time();
7092 /*
7093 * Trigger a coredump of this process. Don't proceed unless we know we won't
7094 * be filling up the disk; and ignore the core size resource limit for this
7095 * core file.
7096 */
7097 if ((error = coredump(cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
7098 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
7099 }
7100 /*
7101 * coredump() leaves the task suspended.
7102 */
7103 task_resume_internal(current_task());
7104
7105 end = mach_absolute_time();
7106 absolutetime_to_microtime(end - starttime, &secs, µsecs);
7107 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
7108 proc_name_address(cur_bsd_info), pid, (int)secs, microsecs);
7109 }
7110 #endif /* CONFIG_COREDUMP */
7111
7112 if (disable_exc_resource) {
7113 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7114 "suppressed by a boot-arg.\n", procname, pid, max_footprint_mb);
7115 return;
7116 }
7117 printf("process %s [%d] crossed memory %s (%d MB); EXC_RESOURCE "
7118 "\n", procname, pid, (!(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? "high watermark" : "diagnostics limit"), max_footprint_mb);
7119
7120 /*
7121 * A task that has triggered an EXC_RESOURCE, should not be
7122 * jetsammed when the device is under memory pressure. Here
7123 * we set the P_MEMSTAT_SKIP flag so that the process
7124 * will be skipped if the memorystatus_thread wakes up.
7125 *
7126 * This is a debugging aid to ensure we can get a corpse before
7127 * the jetsam thread kills the process.
7128 * Note that proc_memstat_skip is a no-op on release kernels.
7129 */
7130 proc_memstat_skip(cur_bsd_info, TRUE);
7131
7132 code[0] = code[1] = 0;
7133 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
7134 /*
7135 * Regardless if there was a diag memlimit violation, fatal exceptions shall be notified always
7136 * as high level watermaks. In another words, if there was a diag limit and a watermark, and the
7137 * violation if for limit watermark, a watermark shall be reported.
7138 */
7139 if (!(exception_options & EXEC_RESOURCE_FATAL)) {
7140 EXC_RESOURCE_ENCODE_FLAVOR(code[0], !(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? FLAVOR_HIGH_WATERMARK : FLAVOR_DIAG_MEMLIMIT);
7141 } else {
7142 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK );
7143 }
7144 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
7145 /*
7146 * Do not generate a corpse fork if the violation is a fatal one
7147 * or the process wants synchronous EXC_RESOURCE exceptions.
7148 */
7149 if ((exception_options & EXEC_RESOURCE_FATAL) || send_sync_exc_resource || !exc_via_corpse_forking) {
7150 if (exception_options & EXEC_RESOURCE_FATAL) {
7151 vm_map_set_corpse_source(task->map);
7152 }
7153
7154 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
7155 if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
7156 /*
7157 * Use the _internal_ variant so that no user-space
7158 * process can resume our task from under us.
7159 */
7160 task_suspend_internal(task);
7161 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7162 task_resume_internal(task);
7163 }
7164 } else {
7165 if (disable_exc_resource_during_audio && audio_active) {
7166 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7167 "suppressed due to audio playback.\n", procname, pid, max_footprint_mb);
7168 } else {
7169 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
7170 code, EXCEPTION_CODE_MAX, NULL, FALSE);
7171 }
7172 }
7173
7174 /*
7175 * After the EXC_RESOURCE has been handled, we must clear the
7176 * P_MEMSTAT_SKIP flag so that the process can again be
7177 * considered for jetsam if the memorystatus_thread wakes up.
7178 */
7179 proc_memstat_skip(cur_bsd_info, FALSE); /* clear the flag */
7180 }
7181 /*
7182 * Callback invoked when a task exceeds its physical footprint limit.
7183 */
7184 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)7185 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7186 {
7187 ledger_amount_t max_footprint = 0;
7188 ledger_amount_t max_footprint_mb = 0;
7189 #if DEBUG || DEVELOPMENT
7190 ledger_amount_t diag_threshold_limit_mb = 0;
7191 ledger_amount_t diag_threshold_limit = 0;
7192 #endif
7193 #if CONFIG_DEFERRED_RECLAIM
7194 ledger_amount_t current_footprint;
7195 #endif /* CONFIG_DEFERRED_RECLAIM */
7196 task_t task;
7197 send_exec_resource_is_warning is_warning = IS_NOT_WARNING;
7198 boolean_t memlimit_is_active;
7199 send_exec_resource_is_fatal memlimit_is_fatal;
7200 send_exec_resource_is_diagnostics is_diag_mem_threshold = IS_NOT_DIAGNOSTICS;
7201 if (warning == LEDGER_WARNING_DIAG_MEM_THRESHOLD) {
7202 is_diag_mem_threshold = IS_DIAGNOSTICS;
7203 is_warning = IS_WARNING;
7204 } else if (warning == LEDGER_WARNING_DIPPED_BELOW) {
7205 /*
7206 * Task memory limits only provide a warning on the way up.
7207 */
7208 return;
7209 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7210 /*
7211 * This task is in danger of violating a memory limit,
7212 * It has exceeded a percentage level of the limit.
7213 */
7214 is_warning = IS_WARNING;
7215 } else {
7216 /*
7217 * The task has exceeded the physical footprint limit.
7218 * This is not a warning but a true limit violation.
7219 */
7220 is_warning = IS_NOT_WARNING;
7221 }
7222
7223 task = current_task();
7224
7225 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
7226 #if DEBUG || DEVELOPMENT
7227 ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &diag_threshold_limit);
7228 #endif
7229 #if CONFIG_DEFERRED_RECLAIM
7230 if (task->deferred_reclamation_metadata != NULL) {
7231 /*
7232 * Task is enrolled in deferred reclamation.
7233 * Do a reclaim to ensure it's really over its limit.
7234 */
7235 vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
7236 ledger_get_balance(task->ledger, task_ledgers.phys_footprint, ¤t_footprint);
7237 if (current_footprint < max_footprint) {
7238 return;
7239 }
7240 }
7241 #endif /* CONFIG_DEFERRED_RECLAIM */
7242 max_footprint_mb = max_footprint >> 20;
7243 #if DEBUG || DEVELOPMENT
7244 diag_threshold_limit_mb = diag_threshold_limit >> 20;
7245 #endif
7246 memlimit_is_active = task_get_memlimit_is_active(task);
7247 memlimit_is_fatal = task_get_memlimit_is_fatal(task) == FALSE ? IS_NOT_FATAL : IS_FATAL;
7248 #if DEBUG || DEVELOPMENT
7249 if (is_diag_mem_threshold == IS_NOT_DIAGNOSTICS) {
7250 task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7251 } else {
7252 task_process_crossed_limit_diag(diag_threshold_limit_mb);
7253 }
7254 #else
7255 task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7256 #endif
7257 }
7258
7259 /*
7260 * Actions to perfrom when a process has crossed watermark or is a fatal consumption */
7261 static inline void
task_process_crossed_limit_no_diag(task_t task,ledger_amount_t ledger_limit_size,bool memlimit_is_fatal,bool memlimit_is_active,send_exec_resource_is_warning is_warning)7262 task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning)
7263 {
7264 send_exec_resource_options_t exception_options = 0;
7265 if (memlimit_is_fatal) {
7266 exception_options |= EXEC_RESOURCE_FATAL;
7267 }
7268 /*
7269 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7270 * We only generate the exception once per process per memlimit (active/inactive limit).
7271 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
7272 * and we disable it by marking that memlimit as exception triggered.
7273 */
7274 if (is_warning == IS_NOT_WARNING && !task_has_triggered_exc_resource(task, memlimit_is_active)) {
7275 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7276 // If it was not a diag threshold (if was a memory limit), then we do not want more signalling,
7277 // however, if was a diag limit, the user may reload a different limit and signal again the violation
7278 memorystatus_log_exception((int)ledger_limit_size, memlimit_is_active, memlimit_is_fatal);
7279 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
7280 }
7281 memorystatus_on_ledger_footprint_exceeded(is_warning == IS_NOT_WARNING ? FALSE : TRUE, memlimit_is_active, memlimit_is_fatal);
7282 }
7283
7284 #if DEBUG || DEVELOPMENT
7285 /**
7286 * Actions to take when a process has crossed the diagnostics limit
7287 */
7288 static inline void
task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)7289 task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)
7290 {
7291 /*
7292 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7293 * In the case of the diagnostics thresholds, the exception will be signaled only once, but the
7294 * inhibit / rearm mechanism if performed at ledger level.
7295 */
7296 send_exec_resource_options_t exception_options = EXEC_RESOURCE_DIAGNOSTIC;
7297 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7298 memorystatus_log_diag_threshold_exception((int)ledger_limit_size);
7299 }
7300 #endif
7301
7302 extern int proc_check_footprint_priv(void);
7303
7304 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)7305 task_set_phys_footprint_limit(
7306 task_t task,
7307 int new_limit_mb,
7308 int *old_limit_mb)
7309 {
7310 kern_return_t error;
7311
7312 boolean_t memlimit_is_active;
7313 boolean_t memlimit_is_fatal;
7314
7315 if ((error = proc_check_footprint_priv())) {
7316 return KERN_NO_ACCESS;
7317 }
7318
7319 /*
7320 * This call should probably be obsoleted.
7321 * But for now, we default to current state.
7322 */
7323 memlimit_is_active = task_get_memlimit_is_active(task);
7324 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
7325
7326 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
7327 }
7328
7329 /*
7330 * Set the limit of diagnostics memory consumption for a concrete task
7331 */
7332 #if CONFIG_MEMORYSTATUS
7333 #if DEVELOPMENT || DEBUG
7334 kern_return_t
task_set_diag_footprint_limit(task_t task,uint64_t new_limit_mb,uint64_t * old_limit_mb)7335 task_set_diag_footprint_limit(
7336 task_t task,
7337 uint64_t new_limit_mb,
7338 uint64_t *old_limit_mb)
7339 {
7340 kern_return_t error;
7341
7342 if ((error = proc_check_footprint_priv())) {
7343 return KERN_NO_ACCESS;
7344 }
7345
7346 return task_set_diag_footprint_limit_internal(task, new_limit_mb, old_limit_mb);
7347 }
7348
7349 #endif // DEVELOPMENT || DEBUG
7350 #endif // CONFIG_MEMORYSTATUS
7351
7352 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)7353 task_convert_phys_footprint_limit(
7354 int limit_mb,
7355 int *converted_limit_mb)
7356 {
7357 if (limit_mb == -1) {
7358 /*
7359 * No limit
7360 */
7361 if (max_task_footprint != 0) {
7362 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
7363 } else {
7364 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
7365 }
7366 } else {
7367 /* nothing to convert */
7368 *converted_limit_mb = limit_mb;
7369 }
7370 return KERN_SUCCESS;
7371 }
7372
7373 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)7374 task_set_phys_footprint_limit_internal(
7375 task_t task,
7376 int new_limit_mb,
7377 int *old_limit_mb,
7378 boolean_t memlimit_is_active,
7379 boolean_t memlimit_is_fatal)
7380 {
7381 ledger_amount_t old;
7382 kern_return_t ret;
7383 #if DEVELOPMENT || DEBUG
7384 diagthreshold_check_return diag_threshold_validity;
7385 #endif
7386 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
7387
7388 if (ret != KERN_SUCCESS) {
7389 return ret;
7390 }
7391 /**
7392 * Maybe we will need to re-enable the diag threshold, lets get the value
7393 * and the current status
7394 */
7395 #if DEVELOPMENT || DEBUG
7396 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_mb, false);
7397 /**
7398 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7399 */
7400 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7401 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7402 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7403 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7404 }
7405 #endif
7406
7407 /*
7408 * Check that limit >> 20 will not give an "unexpected" 32-bit
7409 * result. There are, however, implicit assumptions that -1 mb limit
7410 * equates to LEDGER_LIMIT_INFINITY.
7411 */
7412 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
7413
7414 if (old_limit_mb) {
7415 *old_limit_mb = (int)(old >> 20);
7416 }
7417
7418 if (new_limit_mb == -1) {
7419 /*
7420 * Caller wishes to remove the limit.
7421 */
7422 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7423 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7424 max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
7425
7426 task_lock(task);
7427 task_set_memlimit_is_active(task, memlimit_is_active);
7428 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7429 task_unlock(task);
7430 /**
7431 * If the diagnostics were disabled, and now we have a new limit, we have to re-enable it.
7432 */
7433 #if DEVELOPMENT || DEBUG
7434 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7435 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7436 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7437 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7438 }
7439 #endif
7440 return KERN_SUCCESS;
7441 }
7442
7443 #ifdef CONFIG_NOMONITORS
7444 return KERN_SUCCESS;
7445 #endif /* CONFIG_NOMONITORS */
7446
7447 task_lock(task);
7448
7449 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7450 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7451 (((ledger_amount_t)new_limit_mb << 20) == old)) {
7452 /*
7453 * memlimit state is not changing
7454 */
7455 task_unlock(task);
7456 return KERN_SUCCESS;
7457 }
7458
7459 task_set_memlimit_is_active(task, memlimit_is_active);
7460 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7461
7462 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7463 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
7464
7465 if (task == current_task()) {
7466 ledger_check_new_balance(current_thread(), task->ledger,
7467 task_ledgers.phys_footprint);
7468 }
7469
7470 task_unlock(task);
7471 #if DEVELOPMENT || DEBUG
7472 if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7473 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7474 }
7475 #endif
7476
7477 return KERN_SUCCESS;
7478 }
7479
7480 #if RESETTABLE_DIAG_FOOTPRINT_LIMITS
7481 kern_return_t
task_set_diag_footprint_limit_internal(task_t task,uint64_t new_limit_bytes,uint64_t * old_limit_bytes)7482 task_set_diag_footprint_limit_internal(
7483 task_t task,
7484 uint64_t new_limit_bytes,
7485 uint64_t *old_limit_bytes)
7486 {
7487 ledger_amount_t old = 0;
7488 kern_return_t ret = KERN_SUCCESS;
7489 diagthreshold_check_return diag_threshold_validity;
7490 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &old);
7491
7492 if (ret != KERN_SUCCESS) {
7493 return ret;
7494 }
7495 /**
7496 * Maybe we will need to re-enable the diag threshold, lets get the value
7497 * and the current status
7498 */
7499 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_bytes >> 20, true);
7500 /**
7501 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7502 */
7503 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7504 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7505 }
7506
7507 /*
7508 * Check that limit >> 20 will not give an "unexpected" 32-bit
7509 * result. There are, however, implicit assumptions that -1 mb limit
7510 * equates to LEDGER_LIMIT_INFINITY.
7511 */
7512 if (old_limit_bytes) {
7513 *old_limit_bytes = old;
7514 }
7515
7516 if (new_limit_bytes == -1) {
7517 /*
7518 * Caller wishes to remove the limit.
7519 */
7520 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7521 LEDGER_LIMIT_INFINITY);
7522 /*
7523 * If the memory diagnostics flag was disabled, lets enable it again
7524 */
7525 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7526 return KERN_SUCCESS;
7527 }
7528
7529 #ifdef CONFIG_NOMONITORS
7530 return KERN_SUCCESS;
7531 #else
7532
7533 task_lock(task);
7534 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7535 (ledger_amount_t)new_limit_bytes );
7536 if (task == current_task()) {
7537 ledger_check_new_balance(current_thread(), task->ledger,
7538 task_ledgers.phys_footprint);
7539 }
7540
7541 task_unlock(task);
7542 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7543 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7544 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7545 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7546 }
7547
7548 return KERN_SUCCESS;
7549 #endif /* CONFIG_NOMONITORS */
7550 }
7551
7552 kern_return_t
task_get_diag_footprint_limit_internal(task_t task,uint64_t * new_limit_bytes,bool * threshold_disabled)7553 task_get_diag_footprint_limit_internal(
7554 task_t task,
7555 uint64_t *new_limit_bytes,
7556 bool *threshold_disabled)
7557 {
7558 ledger_amount_t ledger_limit;
7559 kern_return_t ret = KERN_SUCCESS;
7560 if (new_limit_bytes == NULL || threshold_disabled == NULL) {
7561 return KERN_INVALID_ARGUMENT;
7562 }
7563 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &ledger_limit);
7564 if (ledger_limit == LEDGER_LIMIT_INFINITY) {
7565 ledger_limit = -1;
7566 }
7567 if (ret == KERN_SUCCESS) {
7568 *new_limit_bytes = ledger_limit;
7569 ret = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, threshold_disabled);
7570 }
7571 return ret;
7572 }
7573 #endif /* RESETTABLE_DIAG_FOOTPRINT_LIMITS */
7574
7575
7576 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)7577 task_get_phys_footprint_limit(
7578 task_t task,
7579 int *limit_mb)
7580 {
7581 ledger_amount_t limit;
7582 kern_return_t ret;
7583
7584 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
7585 if (ret != KERN_SUCCESS) {
7586 return ret;
7587 }
7588
7589 /*
7590 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7591 * result. There are, however, implicit assumptions that -1 mb limit
7592 * equates to LEDGER_LIMIT_INFINITY.
7593 */
7594 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7595 *limit_mb = (int)(limit >> 20);
7596
7597 return KERN_SUCCESS;
7598 }
7599 #else /* CONFIG_MEMORYSTATUS */
7600 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7601 task_set_phys_footprint_limit(
7602 __unused task_t task,
7603 __unused int new_limit_mb,
7604 __unused int *old_limit_mb)
7605 {
7606 return KERN_FAILURE;
7607 }
7608
7609 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7610 task_get_phys_footprint_limit(
7611 __unused task_t task,
7612 __unused int *limit_mb)
7613 {
7614 return KERN_FAILURE;
7615 }
7616 #endif /* CONFIG_MEMORYSTATUS */
7617
7618 security_token_t *
task_get_sec_token(task_t task)7619 task_get_sec_token(task_t task)
7620 {
7621 return &task_get_ro(task)->task_tokens.sec_token;
7622 }
7623
7624 void
task_set_sec_token(task_t task,security_token_t * token)7625 task_set_sec_token(task_t task, security_token_t *token)
7626 {
7627 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7628 task_tokens.sec_token, token);
7629 }
7630
7631 audit_token_t *
task_get_audit_token(task_t task)7632 task_get_audit_token(task_t task)
7633 {
7634 return &task_get_ro(task)->task_tokens.audit_token;
7635 }
7636
7637 void
task_set_audit_token(task_t task,audit_token_t * token)7638 task_set_audit_token(task_t task, audit_token_t *token)
7639 {
7640 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7641 task_tokens.audit_token, token);
7642 }
7643
7644 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7645 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7646 {
7647 struct task_token_ro_data tokens;
7648
7649 tokens = task_get_ro(task)->task_tokens;
7650 tokens.sec_token = *sec_token;
7651 tokens.audit_token = *audit_token;
7652
7653 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7654 &tokens);
7655 }
7656
7657 boolean_t
task_is_privileged(task_t task)7658 task_is_privileged(task_t task)
7659 {
7660 return task_get_sec_token(task)->val[0] == 0;
7661 }
7662
7663 #ifdef CONFIG_MACF
7664 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7665 task_get_mach_trap_filter_mask(task_t task)
7666 {
7667 return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7668 }
7669
7670 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7671 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7672 {
7673 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7674 task_filters.mach_trap_filter_mask, &mask);
7675 }
7676
7677 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7678 task_get_mach_kobj_filter_mask(task_t task)
7679 {
7680 return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7681 }
7682
7683 mach_vm_address_t
task_get_all_image_info_addr(task_t task)7684 task_get_all_image_info_addr(task_t task)
7685 {
7686 return task->all_image_info_addr;
7687 }
7688
7689 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7690 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7691 {
7692 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7693 task_filters.mach_kobj_filter_mask, &mask);
7694 }
7695
7696 #endif /* CONFIG_MACF */
7697
7698 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7699 task_set_thread_limit(task_t task, uint16_t thread_limit)
7700 {
7701 assert(task != kernel_task);
7702 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7703 task_lock(task);
7704 task->task_thread_limit = thread_limit;
7705 task_unlock(task);
7706 }
7707 }
7708
7709 #if CONFIG_PROC_RESOURCE_LIMITS
7710 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7711 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7712 {
7713 return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7714 }
7715 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7716
7717 #if XNU_TARGET_OS_OSX
7718 boolean_t
task_has_system_version_compat_enabled(task_t task)7719 task_has_system_version_compat_enabled(task_t task)
7720 {
7721 boolean_t enabled = FALSE;
7722
7723 task_lock(task);
7724 enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7725 task_unlock(task);
7726
7727 return enabled;
7728 }
7729
7730 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7731 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7732 {
7733 assert(task == current_task());
7734 assert(task != kernel_task);
7735
7736 task_lock(task);
7737 if (enable_system_version_compat) {
7738 task->t_flags |= TF_SYS_VERSION_COMPAT;
7739 } else {
7740 task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7741 }
7742 task_unlock(task);
7743 }
7744 #endif /* XNU_TARGET_OS_OSX */
7745
7746 /*
7747 * We need to export some functions to other components that
7748 * are currently implemented in macros within the osfmk
7749 * component. Just export them as functions of the same name.
7750 */
7751 boolean_t
is_kerneltask(task_t t)7752 is_kerneltask(task_t t)
7753 {
7754 if (t == kernel_task) {
7755 return TRUE;
7756 }
7757
7758 return FALSE;
7759 }
7760
7761 boolean_t
is_corpsefork(task_t t)7762 is_corpsefork(task_t t)
7763 {
7764 return task_is_a_corpse_fork(t);
7765 }
7766
7767 task_t
current_task_early(void)7768 current_task_early(void)
7769 {
7770 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7771 if (current_thread()->t_tro == NULL) {
7772 return TASK_NULL;
7773 }
7774 }
7775 return get_threadtask(current_thread());
7776 }
7777
7778 task_t
current_task(void)7779 current_task(void)
7780 {
7781 return get_threadtask(current_thread());
7782 }
7783
7784 /* defined in bsd/kern/kern_prot.c */
7785 extern int get_audit_token_pid(audit_token_t *audit_token);
7786
7787 int
task_pid(task_t task)7788 task_pid(task_t task)
7789 {
7790 if (task) {
7791 return get_audit_token_pid(task_get_audit_token(task));
7792 }
7793 return -1;
7794 }
7795
7796 #if __has_feature(ptrauth_calls)
7797 /*
7798 * Get the shared region id and jop signing key for the task.
7799 * The function will allocate a kalloc buffer and return
7800 * it to caller, the caller needs to free it. This is used
7801 * for getting the information via task port.
7802 */
7803 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7804 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7805 {
7806 size_t len;
7807 char *shared_region_id = NULL;
7808
7809 task_lock(task);
7810 if (task->shared_region_id == NULL) {
7811 task_unlock(task);
7812 return NULL;
7813 }
7814 len = strlen(task->shared_region_id) + 1;
7815
7816 /* don't hold task lock while allocating */
7817 task_unlock(task);
7818 shared_region_id = kalloc_data(len, Z_WAITOK);
7819 task_lock(task);
7820
7821 if (task->shared_region_id == NULL) {
7822 task_unlock(task);
7823 kfree_data(shared_region_id, len);
7824 return NULL;
7825 }
7826 assert(len == strlen(task->shared_region_id) + 1); /* should never change */
7827 strlcpy(shared_region_id, task->shared_region_id, len);
7828 task_unlock(task);
7829
7830 /* find key from its auth pager */
7831 if (jop_pid != NULL) {
7832 *jop_pid = shared_region_find_key(shared_region_id);
7833 }
7834
7835 return shared_region_id;
7836 }
7837
7838 /*
7839 * set the shared region id for a task
7840 */
7841 void
task_set_shared_region_id(task_t task,char * id)7842 task_set_shared_region_id(task_t task, char *id)
7843 {
7844 char *old_id;
7845
7846 task_lock(task);
7847 old_id = task->shared_region_id;
7848 task->shared_region_id = id;
7849 task->shared_region_auth_remapped = FALSE;
7850 task_unlock(task);
7851
7852 /* free any pre-existing shared region id */
7853 if (old_id != NULL) {
7854 shared_region_key_dealloc(old_id);
7855 kfree_data(old_id, strlen(old_id) + 1);
7856 }
7857 }
7858 #endif /* __has_feature(ptrauth_calls) */
7859
7860 /*
7861 * This routine finds a thread in a task by its unique id
7862 * Returns a referenced thread or THREAD_NULL if the thread was not found
7863 *
7864 * TODO: This is super inefficient - it's an O(threads in task) list walk!
7865 * We should make a tid hash, or transition all tid clients to thread ports
7866 *
7867 * Precondition: No locks held (will take task lock)
7868 */
7869 thread_t
task_findtid(task_t task,uint64_t tid)7870 task_findtid(task_t task, uint64_t tid)
7871 {
7872 thread_t self = current_thread();
7873 thread_t found_thread = THREAD_NULL;
7874 thread_t iter_thread = THREAD_NULL;
7875
7876 /* Short-circuit the lookup if we're looking up ourselves */
7877 if (tid == self->thread_id || tid == TID_NULL) {
7878 assert(get_threadtask(self) == task);
7879
7880 thread_reference(self);
7881
7882 return self;
7883 }
7884
7885 task_lock(task);
7886
7887 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7888 if (iter_thread->thread_id == tid) {
7889 found_thread = iter_thread;
7890 thread_reference(found_thread);
7891 break;
7892 }
7893 }
7894
7895 task_unlock(task);
7896
7897 return found_thread;
7898 }
7899
7900 int
pid_from_task(task_t task)7901 pid_from_task(task_t task)
7902 {
7903 int pid = -1;
7904 void *bsd_info = get_bsdtask_info(task);
7905
7906 if (bsd_info) {
7907 pid = proc_pid(bsd_info);
7908 } else {
7909 pid = task_pid(task);
7910 }
7911
7912 return pid;
7913 }
7914
7915 /*
7916 * Control the CPU usage monitor for a task.
7917 */
7918 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7919 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7920 {
7921 int error = KERN_SUCCESS;
7922
7923 if (*flags & CPUMON_MAKE_FATAL) {
7924 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7925 } else {
7926 error = KERN_INVALID_ARGUMENT;
7927 }
7928
7929 return error;
7930 }
7931
7932 /*
7933 * Control the wakeups monitor for a task.
7934 */
7935 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7936 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7937 {
7938 ledger_t ledger = task->ledger;
7939
7940 task_lock(task);
7941 if (*flags & WAKEMON_GET_PARAMS) {
7942 ledger_amount_t limit;
7943 uint64_t period;
7944
7945 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7946 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7947
7948 if (limit != LEDGER_LIMIT_INFINITY) {
7949 /*
7950 * An active limit means the wakeups monitor is enabled.
7951 */
7952 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7953 *flags = WAKEMON_ENABLE;
7954 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7955 *flags |= WAKEMON_MAKE_FATAL;
7956 }
7957 } else {
7958 *flags = WAKEMON_DISABLE;
7959 *rate_hz = -1;
7960 }
7961
7962 /*
7963 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7964 */
7965 task_unlock(task);
7966 return KERN_SUCCESS;
7967 }
7968
7969 if (*flags & WAKEMON_ENABLE) {
7970 if (*flags & WAKEMON_SET_DEFAULTS) {
7971 *rate_hz = task_wakeups_monitor_rate;
7972 }
7973
7974 #ifndef CONFIG_NOMONITORS
7975 if (*flags & WAKEMON_MAKE_FATAL) {
7976 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7977 }
7978 #endif /* CONFIG_NOMONITORS */
7979
7980 if (*rate_hz <= 0) {
7981 task_unlock(task);
7982 return KERN_INVALID_ARGUMENT;
7983 }
7984
7985 #ifndef CONFIG_NOMONITORS
7986 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7987 (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7988 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7989 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7990 #endif /* CONFIG_NOMONITORS */
7991 } else if (*flags & WAKEMON_DISABLE) {
7992 /*
7993 * Caller wishes to disable wakeups monitor on the task.
7994 *
7995 * Disable telemetry if it was triggered by the wakeups monitor, and
7996 * remove the limit & callback on the wakeups ledger entry.
7997 */
7998 #if CONFIG_TELEMETRY
7999 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
8000 #endif
8001 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
8002 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
8003 }
8004
8005 task_unlock(task);
8006 return KERN_SUCCESS;
8007 }
8008
8009 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)8010 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
8011 {
8012 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
8013 #if CONFIG_TELEMETRY
8014 /*
8015 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
8016 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
8017 */
8018 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
8019 #endif
8020 return;
8021 }
8022
8023 #if CONFIG_TELEMETRY
8024 /*
8025 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
8026 * exceeded the limit, turn telemetry off for the task.
8027 */
8028 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
8029 #endif
8030
8031 if (warning == 0) {
8032 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
8033 }
8034 }
8035
8036 TUNABLE(bool, enable_wakeup_reports, "enable_wakeup_reports", false); /* Enable wakeup reports. */
8037
8038 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)8039 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
8040 {
8041 task_t task = current_task();
8042 int pid = 0;
8043 const char *procname = "unknown";
8044 boolean_t fatal;
8045 kern_return_t kr;
8046 #ifdef EXC_RESOURCE_MONITORS
8047 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8048 #endif /* EXC_RESOURCE_MONITORS */
8049 struct ledger_entry_info lei;
8050
8051 #ifdef MACH_BSD
8052 pid = proc_selfpid();
8053 if (get_bsdtask_info(task) != NULL) {
8054 procname = proc_name_address(get_bsdtask_info(current_task()));
8055 }
8056 #endif
8057
8058 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
8059
8060 /*
8061 * Disable the exception notification so we don't overwhelm
8062 * the listener with an endless stream of redundant exceptions.
8063 * TODO: detect whether another thread is already reporting the violation.
8064 */
8065 uint32_t flags = WAKEMON_DISABLE;
8066 task_wakeups_monitor_ctl(task, &flags, NULL);
8067
8068 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
8069 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
8070 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
8071 "over ~%llu seconds, averaging %llu wakes / second and "
8072 "violating a %slimit of %llu wakes over %llu seconds.\n",
8073 procname, pid,
8074 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
8075 lei.lei_last_refill == 0 ? 0 :
8076 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
8077 fatal ? "FATAL " : "",
8078 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
8079
8080 if (enable_wakeup_reports) {
8081 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
8082 fatal ? kRNFatalLimitFlag : 0);
8083 if (kr) {
8084 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
8085 }
8086 }
8087
8088 #ifdef EXC_RESOURCE_MONITORS
8089 if (disable_exc_resource) {
8090 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8091 "suppressed by a boot-arg\n", procname, pid);
8092 return;
8093 }
8094 if (disable_exc_resource_during_audio && audio_active) {
8095 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8096 "suppressed due to audio playback\n", procname, pid);
8097 return;
8098 }
8099 if (lei.lei_last_refill == 0) {
8100 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8101 "suppressed due to lei.lei_last_refill = 0 \n", procname, pid);
8102 }
8103
8104 code[0] = code[1] = 0;
8105 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
8106 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
8107 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
8108 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
8109 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
8110 lei.lei_last_refill);
8111 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
8112 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
8113 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8114 #endif /* EXC_RESOURCE_MONITORS */
8115
8116 if (fatal) {
8117 task_terminate_internal(task);
8118 }
8119 }
8120
8121 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)8122 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
8123 {
8124 int64_t old_count, new_count;
8125 boolean_t needs_telemetry;
8126
8127 do {
8128 new_count = old_count = *global_write_count;
8129 new_count += io_delta;
8130 if (new_count >= io_telemetry_limit) {
8131 new_count = 0;
8132 needs_telemetry = TRUE;
8133 } else {
8134 needs_telemetry = FALSE;
8135 }
8136 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
8137 return needs_telemetry;
8138 }
8139
8140 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)8141 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
8142 {
8143 #if CONFIG_PHYS_WRITE_ACCT
8144 if (!io_size) {
8145 return;
8146 }
8147
8148 /*
8149 * task == NULL means that we have to update kernel_task ledgers
8150 */
8151 if (!task) {
8152 task = kernel_task;
8153 }
8154
8155 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
8156 task_pid(task), flavor, io_size, flags, 0);
8157 DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
8158
8159 if (flags & TASK_BALANCE_CREDIT) {
8160 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8161 OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8162 ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8163 }
8164 } else if (flags & TASK_BALANCE_DEBIT) {
8165 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8166 OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8167 ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8168 }
8169 }
8170 #endif /* CONFIG_PHYS_WRITE_ACCT */
8171 }
8172
8173 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)8174 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
8175 {
8176 int64_t io_delta = 0;
8177 int64_t * global_counter_to_update;
8178 boolean_t needs_telemetry = FALSE;
8179 boolean_t is_external_device = FALSE;
8180 int ledger_to_update = 0;
8181 struct task_writes_counters * writes_counters_to_update;
8182
8183 if ((!task) || (!io_size) || (!vp)) {
8184 return;
8185 }
8186
8187 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
8188 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
8189 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
8190
8191 // Is the drive backing this vnode internal or external to the system?
8192 if (vnode_isonexternalstorage(vp) == false) {
8193 global_counter_to_update = &global_logical_writes_count;
8194 ledger_to_update = task_ledgers.logical_writes;
8195 writes_counters_to_update = &task->task_writes_counters_internal;
8196 is_external_device = FALSE;
8197 } else {
8198 global_counter_to_update = &global_logical_writes_to_external_count;
8199 ledger_to_update = task_ledgers.logical_writes_to_external;
8200 writes_counters_to_update = &task->task_writes_counters_external;
8201 is_external_device = TRUE;
8202 }
8203
8204 switch (flags) {
8205 case TASK_WRITE_IMMEDIATE:
8206 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
8207 ledger_credit(task->ledger, ledger_to_update, io_size);
8208 if (!is_external_device) {
8209 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8210 }
8211 break;
8212 case TASK_WRITE_DEFERRED:
8213 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
8214 ledger_credit(task->ledger, ledger_to_update, io_size);
8215 if (!is_external_device) {
8216 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8217 }
8218 break;
8219 case TASK_WRITE_INVALIDATED:
8220 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
8221 ledger_debit(task->ledger, ledger_to_update, io_size);
8222 if (!is_external_device) {
8223 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
8224 }
8225 break;
8226 case TASK_WRITE_METADATA:
8227 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
8228 ledger_credit(task->ledger, ledger_to_update, io_size);
8229 if (!is_external_device) {
8230 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8231 }
8232 break;
8233 }
8234
8235 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
8236 if (io_telemetry_limit != 0) {
8237 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
8238 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
8239 if (needs_telemetry && !is_external_device) {
8240 act_set_io_telemetry_ast(current_thread());
8241 }
8242 }
8243 }
8244
8245 /*
8246 * Control the I/O monitor for a task.
8247 */
8248 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)8249 task_io_monitor_ctl(task_t task, uint32_t *flags)
8250 {
8251 ledger_t ledger = task->ledger;
8252
8253 task_lock(task);
8254 if (*flags & IOMON_ENABLE) {
8255 /* Configure the physical I/O ledger */
8256 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
8257 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
8258 } else if (*flags & IOMON_DISABLE) {
8259 /*
8260 * Caller wishes to disable I/O monitor on the task.
8261 */
8262 ledger_disable_refill(ledger, task_ledgers.physical_writes);
8263 ledger_disable_callback(ledger, task_ledgers.physical_writes);
8264 }
8265
8266 task_unlock(task);
8267 return KERN_SUCCESS;
8268 }
8269
8270 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)8271 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
8272 {
8273 if (warning == 0) {
8274 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
8275 }
8276 }
8277
8278 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)8279 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
8280 {
8281 int pid = 0;
8282 task_t task = current_task();
8283 #ifdef EXC_RESOURCE_MONITORS
8284 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8285 #endif /* EXC_RESOURCE_MONITORS */
8286 struct ledger_entry_info lei = {};
8287 kern_return_t kr;
8288
8289 #ifdef MACH_BSD
8290 pid = proc_selfpid();
8291 #endif
8292 /*
8293 * Get the ledger entry info. We need to do this before disabling the exception
8294 * to get correct values for all fields.
8295 */
8296 switch (flavor) {
8297 case FLAVOR_IO_PHYSICAL_WRITES:
8298 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
8299 break;
8300 }
8301
8302
8303 /*
8304 * Disable the exception notification so we don't overwhelm
8305 * the listener with an endless stream of redundant exceptions.
8306 * TODO: detect whether another thread is already reporting the violation.
8307 */
8308 uint32_t flags = IOMON_DISABLE;
8309 task_io_monitor_ctl(task, &flags);
8310
8311 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
8312 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
8313 }
8314 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
8315 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
8316
8317 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
8318 if (kr) {
8319 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
8320 }
8321
8322 #ifdef EXC_RESOURCE_MONITORS
8323 code[0] = code[1] = 0;
8324 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
8325 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
8326 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
8327 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
8328 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
8329 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8330 #endif /* EXC_RESOURCE_MONITORS */
8331 }
8332
8333 void
task_port_space_ast(__unused task_t task)8334 task_port_space_ast(__unused task_t task)
8335 {
8336 uint32_t current_size, soft_limit, hard_limit;
8337 assert(task == current_task());
8338 bool should_notify = ipc_space_check_table_size_limit(task->itk_space,
8339 ¤t_size, &soft_limit, &hard_limit);
8340 if (should_notify) {
8341 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
8342 }
8343 }
8344
8345 #if CONFIG_PROC_RESOURCE_LIMITS
8346 static mach_port_t
task_allocate_fatal_port(void)8347 task_allocate_fatal_port(void)
8348 {
8349 mach_port_t task_fatal_port = MACH_PORT_NULL;
8350 task_id_token_t token;
8351
8352 kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
8353 if (kr) {
8354 return MACH_PORT_NULL;
8355 }
8356 task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
8357 IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
8358
8359 task_id_token_set_port(token, task_fatal_port);
8360
8361 return task_fatal_port;
8362 }
8363
8364 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)8365 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
8366 {
8367 task_t task = TASK_NULL;
8368 kern_return_t kr;
8369
8370 task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
8371
8372 assert(token != NULL);
8373 if (token) {
8374 kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
8375 if (task) {
8376 task_bsdtask_kill(task);
8377 task_deallocate(task);
8378 }
8379 task_id_token_release(token); /* consumes ref given by notification */
8380 }
8381 }
8382 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8383
8384 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)8385 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
8386 {
8387 int pid = 0;
8388 char *procname = (char *) "unknown";
8389 __unused kern_return_t kr;
8390 __unused resource_notify_flags_t flags = kRNFlagsNone;
8391 __unused uint32_t limit;
8392 __unused mach_port_t task_fatal_port = MACH_PORT_NULL;
8393 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8394
8395 pid = proc_selfpid();
8396 if (get_bsdtask_info(task) != NULL) {
8397 procname = proc_name_address(get_bsdtask_info(task));
8398 }
8399
8400 /*
8401 * Only kernel_task and launchd may be allowed to
8402 * have really large ipc space.
8403 */
8404 if (pid == 0 || pid == 1) {
8405 return;
8406 }
8407
8408 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
8409 Num of ports allocated %u; \n", procname, pid, current_size);
8410
8411 /* Abort the process if it has hit the system-wide limit for ipc port table size */
8412 if (!hard_limit && !soft_limit) {
8413 code[0] = code[1] = 0;
8414 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
8415 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
8416 EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
8417
8418 exit_with_port_space_exception(current_proc(), code[0], code[1]);
8419
8420 return;
8421 }
8422
8423 #if CONFIG_PROC_RESOURCE_LIMITS
8424 if (hard_limit > 0) {
8425 flags |= kRNHardLimitFlag;
8426 limit = hard_limit;
8427 task_fatal_port = task_allocate_fatal_port();
8428 if (!task_fatal_port) {
8429 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8430 task_bsdtask_kill(task);
8431 }
8432 } else {
8433 flags |= kRNSoftLimitFlag;
8434 limit = soft_limit;
8435 }
8436
8437 kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8438 if (kr) {
8439 os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
8440 }
8441 if (task_fatal_port) {
8442 ipc_port_release_send(task_fatal_port);
8443 }
8444 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8445 }
8446
8447 #if CONFIG_PROC_RESOURCE_LIMITS
8448 void
task_kqworkloop_ast(task_t task,int current_size,int soft_limit,int hard_limit)8449 task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit)
8450 {
8451 assert(task == current_task());
8452 return SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task, current_size, soft_limit, hard_limit);
8453 }
8454
8455 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task,int current_size,int soft_limit,int hard_limit)8456 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit)
8457 {
8458 int pid = 0;
8459 char *procname = (char *) "unknown";
8460 #ifdef MACH_BSD
8461 pid = proc_selfpid();
8462 if (get_bsdtask_info(task) != NULL) {
8463 procname = proc_name_address(get_bsdtask_info(task));
8464 }
8465 #endif
8466 if (pid == 0 || pid == 1) {
8467 return;
8468 }
8469
8470 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many kqworkloops. \
8471 Num of kqworkloops allocated %u; \n", procname, pid, current_size);
8472
8473 int limit = 0;
8474 resource_notify_flags_t flags = kRNFlagsNone;
8475 mach_port_t task_fatal_port = MACH_PORT_NULL;
8476 if (hard_limit) {
8477 flags |= kRNHardLimitFlag;
8478 limit = hard_limit;
8479
8480 task_fatal_port = task_allocate_fatal_port();
8481 if (task_fatal_port == MACH_PORT_NULL) {
8482 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8483 task_bsdtask_kill(task);
8484 }
8485 } else {
8486 flags |= kRNSoftLimitFlag;
8487 limit = soft_limit;
8488 }
8489
8490 kern_return_t kr;
8491 kr = send_resource_violation_with_fatal_port(send_kqworkloops_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8492 if (kr) {
8493 os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(kqworkloops, ...): error %#x\n", kr);
8494 }
8495 if (task_fatal_port) {
8496 ipc_port_release_send(task_fatal_port);
8497 }
8498 }
8499
8500
8501 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)8502 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
8503 {
8504 assert(task == current_task());
8505 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
8506 }
8507
8508 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)8509 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
8510 {
8511 int pid = 0;
8512 char *procname = (char *) "unknown";
8513 kern_return_t kr;
8514 resource_notify_flags_t flags = kRNFlagsNone;
8515 int limit;
8516 mach_port_t task_fatal_port = MACH_PORT_NULL;
8517
8518 #ifdef MACH_BSD
8519 pid = proc_selfpid();
8520 if (get_bsdtask_info(task) != NULL) {
8521 procname = proc_name_address(get_bsdtask_info(task));
8522 }
8523 #endif
8524 /*
8525 * Only kernel_task and launchd may be allowed to
8526 * have really large ipc space.
8527 */
8528 if (pid == 0 || pid == 1) {
8529 return;
8530 }
8531
8532 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8533 Num of fds allocated %u; \n", procname, pid, current_size);
8534
8535 if (hard_limit > 0) {
8536 flags |= kRNHardLimitFlag;
8537 limit = hard_limit;
8538 task_fatal_port = task_allocate_fatal_port();
8539 if (!task_fatal_port) {
8540 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8541 task_bsdtask_kill(task);
8542 }
8543 } else {
8544 flags |= kRNSoftLimitFlag;
8545 limit = soft_limit;
8546 }
8547
8548 kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8549 if (kr) {
8550 os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8551 }
8552 if (task_fatal_port) {
8553 ipc_port_release_send(task_fatal_port);
8554 }
8555 }
8556 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8557
8558 /* Placeholders for the task set/get voucher interfaces */
8559 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)8560 task_get_mach_voucher(
8561 task_t task,
8562 mach_voucher_selector_t __unused which,
8563 ipc_voucher_t *voucher)
8564 {
8565 if (TASK_NULL == task) {
8566 return KERN_INVALID_TASK;
8567 }
8568
8569 *voucher = NULL;
8570 return KERN_SUCCESS;
8571 }
8572
8573 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)8574 task_set_mach_voucher(
8575 task_t task,
8576 ipc_voucher_t __unused voucher)
8577 {
8578 if (TASK_NULL == task) {
8579 return KERN_INVALID_TASK;
8580 }
8581
8582 return KERN_SUCCESS;
8583 }
8584
8585 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)8586 task_swap_mach_voucher(
8587 __unused task_t task,
8588 __unused ipc_voucher_t new_voucher,
8589 ipc_voucher_t *in_out_old_voucher)
8590 {
8591 /*
8592 * Currently this function is only called from a MIG generated
8593 * routine which doesn't release the reference on the voucher
8594 * addressed by in_out_old_voucher. To avoid leaking this reference,
8595 * a call to release it has been added here.
8596 */
8597 ipc_voucher_release(*in_out_old_voucher);
8598 OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
8599 }
8600
8601 void
task_set_gpu_denied(task_t task,boolean_t denied)8602 task_set_gpu_denied(task_t task, boolean_t denied)
8603 {
8604 task_lock(task);
8605
8606 if (denied) {
8607 task->t_flags |= TF_GPU_DENIED;
8608 } else {
8609 task->t_flags &= ~TF_GPU_DENIED;
8610 }
8611
8612 task_unlock(task);
8613 }
8614
8615 boolean_t
task_is_gpu_denied(task_t task)8616 task_is_gpu_denied(task_t task)
8617 {
8618 /* We don't need the lock to read this flag */
8619 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8620 }
8621
8622 /*
8623 * Task policy termination uses this path to clear the bit the final time
8624 * during the termination flow, and the TASK_POLICY_TERMINATED bit guarantees
8625 * that it won't be changed again on a terminated task.
8626 */
8627 bool
task_set_game_mode_locked(task_t task,bool enabled)8628 task_set_game_mode_locked(task_t task, bool enabled)
8629 {
8630 task_lock_assert_owned(task);
8631
8632 if (enabled) {
8633 assert(proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0);
8634 }
8635
8636 bool previously_enabled = task_get_game_mode(task);
8637 bool needs_update = false;
8638 uint32_t new_count = 0;
8639
8640 if (enabled) {
8641 task->t_flags |= TF_GAME_MODE;
8642 } else {
8643 task->t_flags &= ~TF_GAME_MODE;
8644 }
8645
8646 if (enabled && !previously_enabled) {
8647 if (task_coalition_adjust_game_mode_count(task, 1, &new_count) && (new_count == 1)) {
8648 needs_update = true;
8649 }
8650 } else if (!enabled && previously_enabled) {
8651 if (task_coalition_adjust_game_mode_count(task, -1, &new_count) && (new_count == 0)) {
8652 needs_update = true;
8653 }
8654 }
8655
8656 return needs_update;
8657 }
8658
8659 void
task_set_game_mode(task_t task,bool enabled)8660 task_set_game_mode(task_t task, bool enabled)
8661 {
8662 bool needs_update = false;
8663
8664 task_lock(task);
8665
8666 /* After termination, further updates are no longer effective */
8667 if (proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0) {
8668 needs_update = task_set_game_mode_locked(task, enabled);
8669 }
8670
8671 task_unlock(task);
8672
8673 #if CONFIG_THREAD_GROUPS
8674 if (needs_update) {
8675 task_coalition_thread_group_game_mode_update(task);
8676 }
8677 #endif /* CONFIG_THREAD_GROUPS */
8678 }
8679
8680 bool
task_get_game_mode(task_t task)8681 task_get_game_mode(task_t task)
8682 {
8683 /* We don't need the lock to read this flag */
8684 return task->t_flags & TF_GAME_MODE;
8685 }
8686
8687
8688 uint64_t
get_task_memory_region_count(task_t task)8689 get_task_memory_region_count(task_t task)
8690 {
8691 vm_map_t map;
8692 map = (task == kernel_task) ? kernel_map: task->map;
8693 return (uint64_t)get_map_nentries(map);
8694 }
8695
8696 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)8697 kdebug_trace_dyld_internal(uint32_t base_code,
8698 struct dyld_kernel_image_info *info)
8699 {
8700 static_assert(sizeof(info->uuid) >= 16);
8701
8702 #if defined(__LP64__)
8703 uint64_t *uuid = (uint64_t *)&(info->uuid);
8704
8705 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8706 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8707 uuid[1], info->load_addr,
8708 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8709 0);
8710 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8711 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8712 (uint64_t)info->fsobjid.fid_objno |
8713 ((uint64_t)info->fsobjid.fid_generation << 32),
8714 0, 0, 0, 0);
8715 #else /* defined(__LP64__) */
8716 uint32_t *uuid = (uint32_t *)&(info->uuid);
8717
8718 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8719 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8720 uuid[1], uuid[2], uuid[3], 0);
8721 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8722 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8723 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8724 info->fsobjid.fid_objno, 0);
8725 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8726 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8727 info->fsobjid.fid_generation, 0, 0, 0, 0);
8728 #endif /* !defined(__LP64__) */
8729 }
8730
8731 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8732 kdebug_trace_dyld(task_t task, uint32_t base_code,
8733 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8734 {
8735 kern_return_t kr;
8736 dyld_kernel_image_info_array_t infos;
8737 vm_map_offset_t map_data;
8738 vm_offset_t data;
8739
8740 if (!infos_copy) {
8741 return KERN_INVALID_ADDRESS;
8742 }
8743
8744 if (!kdebug_enable ||
8745 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8746 vm_map_copy_discard(infos_copy);
8747 return KERN_SUCCESS;
8748 }
8749
8750 if (task == NULL || task != current_task()) {
8751 return KERN_INVALID_TASK;
8752 }
8753
8754 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8755 if (kr != KERN_SUCCESS) {
8756 return kr;
8757 }
8758
8759 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8760
8761 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8762 kdebug_trace_dyld_internal(base_code, &(infos[i]));
8763 }
8764
8765 data = CAST_DOWN(vm_offset_t, map_data);
8766 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8767 return KERN_SUCCESS;
8768 }
8769
8770 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8771 task_register_dyld_image_infos(task_t task,
8772 dyld_kernel_image_info_array_t infos_copy,
8773 mach_msg_type_number_t infos_len)
8774 {
8775 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8776 (vm_map_copy_t)infos_copy, infos_len);
8777 }
8778
8779 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8780 task_unregister_dyld_image_infos(task_t task,
8781 dyld_kernel_image_info_array_t infos_copy,
8782 mach_msg_type_number_t infos_len)
8783 {
8784 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8785 (vm_map_copy_t)infos_copy, infos_len);
8786 }
8787
8788 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8789 task_get_dyld_image_infos(__unused task_t task,
8790 __unused dyld_kernel_image_info_array_t * dyld_images,
8791 __unused mach_msg_type_number_t * dyld_imagesCnt)
8792 {
8793 return KERN_NOT_SUPPORTED;
8794 }
8795
8796 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8797 task_register_dyld_shared_cache_image_info(task_t task,
8798 dyld_kernel_image_info_t cache_img,
8799 __unused boolean_t no_cache,
8800 __unused boolean_t private_cache)
8801 {
8802 if (task == NULL || task != current_task()) {
8803 return KERN_INVALID_TASK;
8804 }
8805
8806 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8807 return KERN_SUCCESS;
8808 }
8809
8810 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8811 task_register_dyld_set_dyld_state(__unused task_t task,
8812 __unused uint8_t dyld_state)
8813 {
8814 return KERN_NOT_SUPPORTED;
8815 }
8816
8817 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8818 task_register_dyld_get_process_state(__unused task_t task,
8819 __unused dyld_kernel_process_info_t * dyld_process_state)
8820 {
8821 return KERN_NOT_SUPPORTED;
8822 }
8823
8824 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8825 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8826 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8827 {
8828 #if CONFIG_PERVASIVE_CPI
8829 task_t task = (task_t)task_insp;
8830 kern_return_t kr = KERN_SUCCESS;
8831 mach_msg_type_number_t size;
8832
8833 if (task == TASK_NULL) {
8834 return KERN_INVALID_ARGUMENT;
8835 }
8836
8837 size = *size_in_out;
8838
8839 switch (flavor) {
8840 case TASK_INSPECT_BASIC_COUNTS: {
8841 struct task_inspect_basic_counts *bc =
8842 (struct task_inspect_basic_counts *)info_out;
8843 struct recount_usage stats = { 0 };
8844 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8845 kr = KERN_INVALID_ARGUMENT;
8846 break;
8847 }
8848
8849 recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8850 bc->instructions = recount_usage_instructions(&stats);
8851 bc->cycles = recount_usage_cycles(&stats);
8852 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8853 break;
8854 }
8855 default:
8856 kr = KERN_INVALID_ARGUMENT;
8857 break;
8858 }
8859
8860 if (kr == KERN_SUCCESS) {
8861 *size_in_out = size;
8862 }
8863 return kr;
8864 #else /* CONFIG_PERVASIVE_CPI */
8865 #pragma unused(task_insp, flavor, info_out, size_in_out)
8866 return KERN_NOT_SUPPORTED;
8867 #endif /* !CONFIG_PERVASIVE_CPI */
8868 }
8869
8870 #if CONFIG_SECLUDED_MEMORY
8871 int num_tasks_can_use_secluded_mem = 0;
8872
8873 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8874 task_set_can_use_secluded_mem(
8875 task_t task,
8876 boolean_t can_use_secluded_mem)
8877 {
8878 if (!task->task_could_use_secluded_mem) {
8879 return;
8880 }
8881 task_lock(task);
8882 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8883 task_unlock(task);
8884 }
8885
8886 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8887 task_set_can_use_secluded_mem_locked(
8888 task_t task,
8889 boolean_t can_use_secluded_mem)
8890 {
8891 assert(task->task_could_use_secluded_mem);
8892 if (can_use_secluded_mem &&
8893 secluded_for_apps && /* global boot-arg */
8894 !task->task_can_use_secluded_mem) {
8895 assert(num_tasks_can_use_secluded_mem >= 0);
8896 OSAddAtomic(+1,
8897 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8898 task->task_can_use_secluded_mem = TRUE;
8899 } else if (!can_use_secluded_mem &&
8900 task->task_can_use_secluded_mem) {
8901 assert(num_tasks_can_use_secluded_mem > 0);
8902 OSAddAtomic(-1,
8903 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8904 task->task_can_use_secluded_mem = FALSE;
8905 }
8906 }
8907
8908 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8909 task_set_could_use_secluded_mem(
8910 task_t task,
8911 boolean_t could_use_secluded_mem)
8912 {
8913 task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8914 }
8915
8916 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8917 task_set_could_also_use_secluded_mem(
8918 task_t task,
8919 boolean_t could_also_use_secluded_mem)
8920 {
8921 task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8922 }
8923
8924 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8925 task_can_use_secluded_mem(
8926 task_t task,
8927 boolean_t is_alloc)
8928 {
8929 if (task->task_can_use_secluded_mem) {
8930 assert(task->task_could_use_secluded_mem);
8931 assert(num_tasks_can_use_secluded_mem > 0);
8932 return TRUE;
8933 }
8934 if (task->task_could_also_use_secluded_mem &&
8935 num_tasks_can_use_secluded_mem > 0) {
8936 assert(num_tasks_can_use_secluded_mem > 0);
8937 return TRUE;
8938 }
8939
8940 /*
8941 * If a single task is using more than some large amount of
8942 * memory (i.e. secluded_shutoff_trigger) and is approaching
8943 * its task limit, allow it to dip into secluded and begin
8944 * suppression of rebuilding secluded memory until that task exits.
8945 */
8946 if (is_alloc && secluded_shutoff_trigger != 0) {
8947 uint64_t phys_used = get_task_phys_footprint(task);
8948 uint64_t limit = get_task_phys_footprint_limit(task);
8949 if (phys_used > secluded_shutoff_trigger &&
8950 limit > secluded_shutoff_trigger &&
8951 phys_used > limit - secluded_shutoff_headroom) {
8952 start_secluded_suppression(task);
8953 return TRUE;
8954 }
8955 }
8956
8957 return FALSE;
8958 }
8959
8960 boolean_t
task_could_use_secluded_mem(task_t task)8961 task_could_use_secluded_mem(
8962 task_t task)
8963 {
8964 return task->task_could_use_secluded_mem;
8965 }
8966
8967 boolean_t
task_could_also_use_secluded_mem(task_t task)8968 task_could_also_use_secluded_mem(
8969 task_t task)
8970 {
8971 return task->task_could_also_use_secluded_mem;
8972 }
8973 #endif /* CONFIG_SECLUDED_MEMORY */
8974
8975 queue_head_t *
task_io_user_clients(task_t task)8976 task_io_user_clients(task_t task)
8977 {
8978 return &task->io_user_clients;
8979 }
8980
8981 void
task_set_message_app_suspended(task_t task,boolean_t enable)8982 task_set_message_app_suspended(task_t task, boolean_t enable)
8983 {
8984 task->message_app_suspended = enable;
8985 }
8986
8987 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8988 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8989 {
8990 dst_task->vtimers = src_task->vtimers;
8991 }
8992
8993 #if DEVELOPMENT || DEBUG
8994 int vm_region_footprint = 0;
8995 #endif /* DEVELOPMENT || DEBUG */
8996
8997 boolean_t
task_self_region_footprint(void)8998 task_self_region_footprint(void)
8999 {
9000 #if DEVELOPMENT || DEBUG
9001 if (vm_region_footprint) {
9002 /* system-wide override */
9003 return TRUE;
9004 }
9005 #endif /* DEVELOPMENT || DEBUG */
9006 return current_task()->task_region_footprint;
9007 }
9008
9009 void
task_self_region_footprint_set(boolean_t newval)9010 task_self_region_footprint_set(
9011 boolean_t newval)
9012 {
9013 task_t curtask;
9014
9015 curtask = current_task();
9016 task_lock(curtask);
9017 if (newval) {
9018 curtask->task_region_footprint = TRUE;
9019 } else {
9020 curtask->task_region_footprint = FALSE;
9021 }
9022 task_unlock(curtask);
9023 }
9024
9025 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)9026 task_set_darkwake_mode(task_t task, boolean_t set_mode)
9027 {
9028 assert(task);
9029
9030 task_lock(task);
9031
9032 if (set_mode) {
9033 task->t_flags |= TF_DARKWAKE_MODE;
9034 } else {
9035 task->t_flags &= ~(TF_DARKWAKE_MODE);
9036 }
9037
9038 task_unlock(task);
9039 }
9040
9041 boolean_t
task_get_darkwake_mode(task_t task)9042 task_get_darkwake_mode(task_t task)
9043 {
9044 assert(task);
9045 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
9046 }
9047
9048 /*
9049 * Set default behavior for task's control port and EXC_GUARD variants that have
9050 * settable behavior.
9051 *
9052 * Platform binaries typically have one behavior, third parties another -
9053 * but there are special exception we may need to account for.
9054 */
9055 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)9056 task_set_exc_guard_ctrl_port_default(
9057 task_t task,
9058 thread_t main_thread,
9059 const char *name,
9060 unsigned int namelen,
9061 boolean_t is_simulated,
9062 uint32_t platform,
9063 uint32_t sdk)
9064 {
9065 task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9066
9067 if (task_is_hardened_binary(task)) {
9068 /* set exc guard default behavior for hardened binaries */
9069 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
9070
9071 if (1 == task_pid(task)) {
9072 /* special flags for inittask - delivery every instance as corpse */
9073 task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
9074 } else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
9075 /* honor by-name default setting overrides */
9076
9077 int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
9078
9079 for (int i = 0; i < count; i++) {
9080 const struct task_exc_guard_named_default *named_default =
9081 &task_exc_guard_named_defaults[i];
9082 if (strncmp(named_default->name, name, namelen) == 0 &&
9083 strlen(named_default->name) == namelen) {
9084 task->task_exc_guard = named_default->behavior;
9085 break;
9086 }
9087 }
9088 }
9089
9090 /* set control port options for 1p code, inherited from parent task by default */
9091 opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
9092 } else {
9093 /* set exc guard default behavior for third-party code */
9094 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
9095 /* set control port options for 3p code, inherited from parent task by default */
9096 opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
9097 }
9098
9099 if (is_simulated) {
9100 /* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
9101 if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
9102 (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
9103 (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
9104 task->task_exc_guard = TASK_EXC_GUARD_NONE;
9105 }
9106 /* Disable protection for control ports for simulated binaries */
9107 opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9108 }
9109
9110
9111 task_set_control_port_options(task, opts);
9112
9113 task_set_immovable_pinned(task);
9114 main_thread_set_immovable_pinned(main_thread);
9115 }
9116
9117 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)9118 task_get_exc_guard_behavior(
9119 task_t task,
9120 task_exc_guard_behavior_t *behaviorp)
9121 {
9122 if (task == TASK_NULL) {
9123 return KERN_INVALID_TASK;
9124 }
9125 *behaviorp = task->task_exc_guard;
9126 return KERN_SUCCESS;
9127 }
9128
9129 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)9130 task_set_exc_guard_behavior(
9131 task_t task,
9132 task_exc_guard_behavior_t new_behavior)
9133 {
9134 if (task == TASK_NULL) {
9135 return KERN_INVALID_TASK;
9136 }
9137 if (new_behavior & ~TASK_EXC_GUARD_ALL) {
9138 return KERN_INVALID_VALUE;
9139 }
9140
9141 /* limit setting to that allowed for this config */
9142 new_behavior = new_behavior & task_exc_guard_config_mask;
9143
9144 #if !defined (DEBUG) && !defined (DEVELOPMENT)
9145 /* On release kernels, only allow _upgrading_ exc guard behavior */
9146 task_exc_guard_behavior_t cur_behavior;
9147
9148 os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
9149 if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
9150 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9151 }
9152
9153 if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
9154 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9155 }
9156
9157 /* no restrictions on CORPSE bit */
9158 });
9159 #else
9160 task->task_exc_guard = new_behavior;
9161 #endif
9162 return KERN_SUCCESS;
9163 }
9164
9165 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)9166 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
9167 {
9168 #if DEVELOPMENT || DEBUG
9169 if (task == TASK_NULL) {
9170 return KERN_INVALID_TASK;
9171 }
9172
9173 task_lock(task);
9174 if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
9175 task->t_flags |= TF_NO_CORPSE_FORKING;
9176 } else {
9177 task->t_flags &= ~TF_NO_CORPSE_FORKING;
9178 }
9179 task_unlock(task);
9180
9181 return KERN_SUCCESS;
9182 #else
9183 (void)task;
9184 (void)behavior;
9185 return KERN_NOT_SUPPORTED;
9186 #endif
9187 }
9188
9189 boolean_t
task_corpse_forking_disabled(task_t task)9190 task_corpse_forking_disabled(task_t task)
9191 {
9192 boolean_t disabled = FALSE;
9193
9194 task_lock(task);
9195 disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
9196 task_unlock(task);
9197
9198 return disabled;
9199 }
9200
9201 #if __arm64__
9202 extern int legacy_footprint_entitlement_mode;
9203 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
9204 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
9205
9206
9207 void
task_set_legacy_footprint(task_t task)9208 task_set_legacy_footprint(
9209 task_t task)
9210 {
9211 task_lock(task);
9212 task->task_legacy_footprint = TRUE;
9213 task_unlock(task);
9214 }
9215
9216 void
task_set_extra_footprint_limit(task_t task)9217 task_set_extra_footprint_limit(
9218 task_t task)
9219 {
9220 if (task->task_extra_footprint_limit) {
9221 return;
9222 }
9223 task_lock(task);
9224 if (task->task_extra_footprint_limit) {
9225 task_unlock(task);
9226 return;
9227 }
9228 task->task_extra_footprint_limit = TRUE;
9229 task_unlock(task);
9230 memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
9231 }
9232
9233 void
task_set_ios13extended_footprint_limit(task_t task)9234 task_set_ios13extended_footprint_limit(
9235 task_t task)
9236 {
9237 if (task->task_ios13extended_footprint_limit) {
9238 return;
9239 }
9240 task_lock(task);
9241 if (task->task_ios13extended_footprint_limit) {
9242 task_unlock(task);
9243 return;
9244 }
9245 task->task_ios13extended_footprint_limit = TRUE;
9246 task_unlock(task);
9247 memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
9248 }
9249 #endif /* __arm64__ */
9250
9251 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)9252 task_ledger_get_balance(
9253 ledger_t ledger,
9254 int ledger_idx)
9255 {
9256 ledger_amount_t amount;
9257 amount = 0;
9258 ledger_get_balance(ledger, ledger_idx, &amount);
9259 return amount;
9260 }
9261
9262 /*
9263 * Gather the amount of memory counted in a task's footprint due to
9264 * being in a specific set of ledgers.
9265 */
9266 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)9267 task_ledgers_footprint(
9268 ledger_t ledger,
9269 ledger_amount_t *ledger_resident,
9270 ledger_amount_t *ledger_compressed)
9271 {
9272 *ledger_resident = 0;
9273 *ledger_compressed = 0;
9274
9275 /* purgeable non-volatile memory */
9276 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
9277 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
9278
9279 /* "default" tagged memory */
9280 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
9281 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
9282
9283 /* "network" currently never counts in the footprint... */
9284
9285 /* "media" tagged memory */
9286 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
9287 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
9288
9289 /* "graphics" tagged memory */
9290 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
9291 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
9292
9293 /* "neural" tagged memory */
9294 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
9295 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
9296 }
9297
9298 #if CONFIG_MEMORYSTATUS
9299 /*
9300 * Credit any outstanding task dirty time to the ledger.
9301 * memstat_dirty_start is pushed forward to prevent any possibility of double
9302 * counting, making it safe to call this as often as necessary to ensure that
9303 * anyone reading the ledger gets up-to-date information.
9304 */
9305 void
task_ledger_settle_dirty_time(task_t t)9306 task_ledger_settle_dirty_time(task_t t)
9307 {
9308 task_lock(t);
9309
9310 uint64_t start = t->memstat_dirty_start;
9311 if (start) {
9312 uint64_t now = mach_absolute_time();
9313
9314 uint64_t duration;
9315 absolutetime_to_nanoseconds(now - start, &duration);
9316
9317 ledger_t ledger = get_task_ledger(t);
9318 ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
9319
9320 t->memstat_dirty_start = now;
9321 }
9322
9323 task_unlock(t);
9324 }
9325 #endif /* CONFIG_MEMORYSTATUS */
9326
9327 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)9328 task_set_memory_ownership_transfer(
9329 task_t task,
9330 boolean_t value)
9331 {
9332 task_lock(task);
9333 task->task_can_transfer_memory_ownership = !!value;
9334 task_unlock(task);
9335 }
9336
9337 #if DEVELOPMENT || DEBUG
9338
9339 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)9340 task_set_no_footprint_for_debug(task_t task, boolean_t value)
9341 {
9342 task_lock(task);
9343 task->task_no_footprint_for_debug = !!value;
9344 task_unlock(task);
9345 }
9346
9347 int
task_get_no_footprint_for_debug(task_t task)9348 task_get_no_footprint_for_debug(task_t task)
9349 {
9350 return task->task_no_footprint_for_debug;
9351 }
9352
9353 #endif /* DEVELOPMENT || DEBUG */
9354
9355 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)9356 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
9357 {
9358 vm_object_t find_vmo;
9359 size_t size = 0;
9360
9361 /*
9362 * Allocate a save area for FP state before taking task_objq lock,
9363 * if necessary, to ensure that VM_KERNEL_ADDRHASH() doesn't cause
9364 * an FP state allocation while holding VM locks.
9365 */
9366 ml_fp_save_area_prealloc();
9367
9368 task_objq_lock(task);
9369 if (query != NULL) {
9370 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
9371 {
9372 vm_object_query_t p = &query[size++];
9373
9374 /* make sure to not overrun */
9375 if (size * sizeof(vm_object_query_data_t) > len) {
9376 --size;
9377 break;
9378 }
9379
9380 bzero(p, sizeof(*p));
9381 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRHASH(find_vmo);
9382 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
9383 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
9384 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
9385 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
9386 p->vo_no_footprint = find_vmo->vo_no_footprint;
9387 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
9388 p->purgable = find_vmo->purgable;
9389
9390 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
9391 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
9392 } else {
9393 p->compressed_size = 0;
9394 }
9395 }
9396 } else {
9397 size = (size_t)task->task_owned_objects;
9398 }
9399 task_objq_unlock(task);
9400
9401 *num = size;
9402 }
9403
9404 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)9405 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
9406 {
9407 assert(output_size);
9408 assert(entries);
9409
9410 /* copy the vmobjects and vmobject data out of the task */
9411 if (buffer_size == 0) {
9412 task_copy_vmobjects(task, NULL, 0, entries);
9413 *output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
9414 } else {
9415 assert(buffer);
9416 task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
9417 buffer->entries = (uint64_t)*entries;
9418 *output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
9419 }
9420 }
9421
9422 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)9423 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
9424 {
9425 size_t buffer_size;
9426 vmobject_list_output_t buffer;
9427 size_t output_size;
9428 size_t entries;
9429
9430 assert(to_task != from_task);
9431
9432 /* get the size, allocate a bufferr, and populate */
9433 entries = 0;
9434 output_size = 0;
9435 task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
9436
9437 if (output_size) {
9438 buffer_size = output_size;
9439 buffer = kalloc_data(buffer_size, Z_WAITOK);
9440
9441 if (buffer) {
9442 entries = 0;
9443 output_size = 0;
9444
9445 task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
9446
9447 if (entries) {
9448 to_task->corpse_vmobject_list = buffer;
9449 to_task->corpse_vmobject_list_size = buffer_size;
9450 }
9451 }
9452 }
9453 }
9454
9455 void
task_set_filter_msg_flag(task_t task,boolean_t flag)9456 task_set_filter_msg_flag(
9457 task_t task,
9458 boolean_t flag)
9459 {
9460 assert(task != TASK_NULL);
9461
9462 if (flag) {
9463 task_ro_flags_set(task, TFRO_FILTER_MSG);
9464 } else {
9465 task_ro_flags_clear(task, TFRO_FILTER_MSG);
9466 }
9467 }
9468
9469 boolean_t
task_get_filter_msg_flag(task_t task)9470 task_get_filter_msg_flag(
9471 task_t task)
9472 {
9473 if (!task) {
9474 return false;
9475 }
9476
9477 return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
9478 }
9479 bool
task_is_exotic(task_t task)9480 task_is_exotic(
9481 task_t task)
9482 {
9483 if (task == TASK_NULL) {
9484 return false;
9485 }
9486 return vm_map_is_exotic(get_task_map(task));
9487 }
9488
9489 bool
task_is_alien(task_t task)9490 task_is_alien(
9491 task_t task)
9492 {
9493 if (task == TASK_NULL) {
9494 return false;
9495 }
9496 return vm_map_is_alien(get_task_map(task));
9497 }
9498
9499
9500
9501 #if CONFIG_MACF
9502 /* Set the filter mask for Mach traps. */
9503 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)9504 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
9505 {
9506 assert(task);
9507
9508 task_set_mach_trap_filter_mask(task, maskptr);
9509 }
9510
9511 /* Set the filter mask for kobject msgs. */
9512 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)9513 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
9514 {
9515 assert(task);
9516
9517 task_set_mach_kobj_filter_mask(task, maskptr);
9518 }
9519
9520 /* Hook for mach trap/sc filter evaluation policy. */
9521 SECURITY_READ_ONLY_LATE(mac_task_mach_filter_cbfunc_t) mac_task_mach_trap_evaluate = NULL;
9522
9523 /* Hook for kobj message filter evaluation policy. */
9524 SECURITY_READ_ONLY_LATE(mac_task_kobj_filter_cbfunc_t) mac_task_kobj_msg_evaluate = NULL;
9525
9526 /* Set the callback hooks for the filtering policy. */
9527 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)9528 mac_task_register_filter_callbacks(
9529 const mac_task_mach_filter_cbfunc_t mach_cbfunc,
9530 const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
9531 {
9532 if (mach_cbfunc != NULL) {
9533 if (mac_task_mach_trap_evaluate != NULL) {
9534 return KERN_FAILURE;
9535 }
9536 mac_task_mach_trap_evaluate = mach_cbfunc;
9537 }
9538 if (kobj_cbfunc != NULL) {
9539 if (mac_task_kobj_msg_evaluate != NULL) {
9540 return KERN_FAILURE;
9541 }
9542 mac_task_kobj_msg_evaluate = kobj_cbfunc;
9543 }
9544
9545 return KERN_SUCCESS;
9546 }
9547 #endif /* CONFIG_MACF */
9548
9549 #if CONFIG_ROSETTA
9550 bool
task_is_translated(task_t task)9551 task_is_translated(task_t task)
9552 {
9553 extern boolean_t proc_is_translated(struct proc* p);
9554 return task && proc_is_translated(get_bsdtask_info(task));
9555 }
9556 #endif
9557
9558
9559
9560 #if __has_feature(ptrauth_calls)
9561 /* On FPAC, we want to deliver all PAC violations as fatal exceptions, regardless
9562 * of the enable_pac_exception boot-arg value or any other entitlements.
9563 * The only case where we allow non-fatal PAC exceptions on FPAC is for debugging,
9564 * which requires Developer Mode enabled.
9565 *
9566 * On non-FPAC hardware, we gate the decision behind entitlements and the
9567 * enable_pac_exception boot-arg.
9568 */
9569 extern int gARM_FEAT_FPAC;
9570 /*
9571 * Having the PAC_EXCEPTION_ENTITLEMENT entitlement means we always enforce all
9572 * of the PAC exception hardening: fatal exceptions and signed user state.
9573 */
9574 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
9575 /*
9576 * On non-FPAC hardware, when enable_pac_exception boot-arg is set to true,
9577 * processes can choose to get non-fatal PAC exception delivery by setting
9578 * the SKIP_PAC_EXCEPTION_ENTITLEMENT entitlement.
9579 */
9580 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
9581
9582 void
task_set_pac_exception_fatal_flag(task_t task)9583 task_set_pac_exception_fatal_flag(
9584 task_t task)
9585 {
9586 assert(task != TASK_NULL);
9587 bool pac_hardened_task = false;
9588 uint32_t set_flags = 0;
9589
9590 /*
9591 * We must not apply this security policy on tasks which have opted out of mach hardening to
9592 * avoid regressions in third party plugins and third party apps when using AMFI boot-args
9593 */
9594 bool platform_binary = task_get_platform_binary(task);
9595 #if XNU_TARGET_OS_OSX
9596 platform_binary &= !task_opted_out_mach_hardening(task);
9597 #endif /* XNU_TARGET_OS_OSX */
9598
9599 /*
9600 * On non-FPAC hardware, we allow gating PAC exceptions behind
9601 * SKIP_PAC_EXCEPTION_ENTITLEMENT and the boot-arg.
9602 */
9603 if (!gARM_FEAT_FPAC && enable_pac_exception &&
9604 IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
9605 return;
9606 }
9607
9608 if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT) || task_get_hardened_runtime(task)) {
9609 pac_hardened_task = true;
9610 set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
9611 }
9612
9613 /* On non-FPAC hardware, gate the fatal property behind entitlements and boot-arg. */
9614 if (pac_hardened_task ||
9615 ((enable_pac_exception || gARM_FEAT_FPAC) && platform_binary)) {
9616 /* If debugging is configured, do not make PAC exception fatal. */
9617 if (address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9618 set_flags |= TFRO_PAC_EXC_FATAL;
9619 }
9620 }
9621
9622 if (set_flags != 0) {
9623 task_ro_flags_set(task, set_flags);
9624 }
9625 }
9626
9627 bool
task_is_pac_exception_fatal(task_t task)9628 task_is_pac_exception_fatal(
9629 task_t task)
9630 {
9631 assert(task != TASK_NULL);
9632 return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
9633 }
9634 #endif /* __has_feature(ptrauth_calls) */
9635
9636 /*
9637 * FATAL_EXCEPTION_ENTITLEMENT, if present, will contain a list of
9638 * conditions for which access violations should deliver SIGKILL rather than
9639 * SIGSEGV. This is a hardening measure intended for use by applications
9640 * that are able to handle the stricter error handling behavior. Currently
9641 * this supports FATAL_EXCEPTION_ENTITLEMENT_JIT, which is documented in
9642 * user_fault_in_self_restrict_mode().
9643 */
9644 #define FATAL_EXCEPTION_ENTITLEMENT "com.apple.security.fatal-exceptions"
9645 #define FATAL_EXCEPTION_ENTITLEMENT_JIT "jit"
9646
9647 void
task_set_jit_exception_fatal_flag(task_t task)9648 task_set_jit_exception_fatal_flag(
9649 task_t task)
9650 {
9651 assert(task != TASK_NULL);
9652 if (IOTaskHasStringEntitlement(task, FATAL_EXCEPTION_ENTITLEMENT, FATAL_EXCEPTION_ENTITLEMENT_JIT) &&
9653 address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9654 task_ro_flags_set(task, TFRO_JIT_EXC_FATAL);
9655 }
9656 }
9657
9658 bool
task_is_jit_exception_fatal(__unused task_t task)9659 task_is_jit_exception_fatal(
9660 __unused task_t task)
9661 {
9662 #if !defined(XNU_PLATFORM_MacOSX)
9663 return true;
9664 #else
9665 assert(task != TASK_NULL);
9666 return !!(task_ro_flags_get(task) & TFRO_JIT_EXC_FATAL);
9667 #endif
9668 }
9669
9670 bool
task_needs_user_signed_thread_state(task_t task)9671 task_needs_user_signed_thread_state(
9672 task_t task)
9673 {
9674 assert(task != TASK_NULL);
9675 return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
9676 }
9677
9678 void
task_set_tecs(task_t task)9679 task_set_tecs(task_t task)
9680 {
9681 if (task == TASK_NULL) {
9682 task = current_task();
9683 }
9684
9685 if (!machine_csv(CPUVN_CI)) {
9686 return;
9687 }
9688
9689 LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9690
9691 task_lock(task);
9692
9693 task->t_flags |= TF_TECS;
9694
9695 thread_t thread;
9696 queue_iterate(&task->threads, thread, thread_t, task_threads) {
9697 machine_tecs(thread);
9698 }
9699 task_unlock(task);
9700 }
9701
9702 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)9703 task_test_sync_upcall(
9704 task_t task,
9705 ipc_port_t send_port)
9706 {
9707 #if DEVELOPMENT || DEBUG
9708 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9709 return KERN_INVALID_ARGUMENT;
9710 }
9711
9712 /* Block on sync kernel upcall on the given send port */
9713 mach_test_sync_upcall(send_port);
9714
9715 ipc_port_release_send(send_port);
9716 return KERN_SUCCESS;
9717 #else
9718 (void)task;
9719 (void)send_port;
9720 return KERN_NOT_SUPPORTED;
9721 #endif
9722 }
9723
9724 kern_return_t
task_test_async_upcall_propagation(task_t task,ipc_port_t send_port,int qos,int iotier)9725 task_test_async_upcall_propagation(
9726 task_t task,
9727 ipc_port_t send_port,
9728 int qos,
9729 int iotier)
9730 {
9731 #if DEVELOPMENT || DEBUG
9732 kern_return_t kr;
9733
9734 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9735 return KERN_INVALID_ARGUMENT;
9736 }
9737
9738 if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
9739 iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
9740 return KERN_INVALID_ARGUMENT;
9741 }
9742
9743 struct thread_attr_for_ipc_propagation attr = {
9744 .tafip_iotier = iotier,
9745 .tafip_qos = qos
9746 };
9747
9748 /* Apply propagate attr to port */
9749 kr = ipc_port_propagate_thread_attr(send_port, attr);
9750 if (kr != KERN_SUCCESS) {
9751 return kr;
9752 }
9753
9754 thread_enable_send_importance(current_thread(), TRUE);
9755
9756 /* Perform an async kernel upcall on the given send port */
9757 mach_test_async_upcall(send_port);
9758 thread_enable_send_importance(current_thread(), FALSE);
9759
9760 ipc_port_release_send(send_port);
9761 return KERN_SUCCESS;
9762 #else
9763 (void)task;
9764 (void)send_port;
9765 (void)qos;
9766 (void)iotier;
9767 return KERN_NOT_SUPPORTED;
9768 #endif
9769 }
9770
9771 #if CONFIG_PROC_RESOURCE_LIMITS
9772 mach_port_name_t
current_task_get_fatal_port_name(void)9773 current_task_get_fatal_port_name(void)
9774 {
9775 mach_port_t task_fatal_port = MACH_PORT_NULL;
9776 mach_port_name_t port_name = 0;
9777
9778 task_fatal_port = task_allocate_fatal_port();
9779
9780 if (task_fatal_port) {
9781 ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9782 IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9783 }
9784
9785 return port_name;
9786 }
9787 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9788
9789 #if defined(__x86_64__)
9790 bool
curtask_get_insn_copy_optout(void)9791 curtask_get_insn_copy_optout(void)
9792 {
9793 bool optout;
9794 task_t cur_task = current_task();
9795
9796 task_lock(cur_task);
9797 optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9798 task_unlock(cur_task);
9799
9800 return optout;
9801 }
9802
9803 void
curtask_set_insn_copy_optout(void)9804 curtask_set_insn_copy_optout(void)
9805 {
9806 task_t cur_task = current_task();
9807
9808 task_lock(cur_task);
9809
9810 cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9811
9812 thread_t thread;
9813 queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9814 machine_thread_set_insn_copy_optout(thread);
9815 }
9816 task_unlock(cur_task);
9817 }
9818 #endif /* defined(__x86_64__) */
9819
9820 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9821 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9822 {
9823 assert(task);
9824 assert(list_size);
9825
9826 *list = task->corpse_vmobject_list;
9827 *list_size = (size_t)task->corpse_vmobject_list_size;
9828 }
9829
9830 __abortlike
9831 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9832 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9833 {
9834 panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9835 "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9836 }
9837
9838 proc_ro_t
task_get_ro(task_t t)9839 task_get_ro(task_t t)
9840 {
9841 proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9842
9843 zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9844 if (__improbable(proc_ro_task(ro) != t)) {
9845 panic_proc_ro_task_backref_mismatch(t, ro);
9846 }
9847
9848 return ro;
9849 }
9850
9851 uint32_t
task_ro_flags_get(task_t task)9852 task_ro_flags_get(task_t task)
9853 {
9854 return task_get_ro(task)->t_flags_ro;
9855 }
9856
9857 void
task_ro_flags_set(task_t task,uint32_t flags)9858 task_ro_flags_set(task_t task, uint32_t flags)
9859 {
9860 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9861 t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9862 }
9863
9864 void
task_ro_flags_clear(task_t task,uint32_t flags)9865 task_ro_flags_clear(task_t task, uint32_t flags)
9866 {
9867 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9868 t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9869 }
9870
9871 task_control_port_options_t
task_get_control_port_options(task_t task)9872 task_get_control_port_options(task_t task)
9873 {
9874 return task_get_ro(task)->task_control_port_options;
9875 }
9876
9877 void
task_set_control_port_options(task_t task,task_control_port_options_t opts)9878 task_set_control_port_options(task_t task, task_control_port_options_t opts)
9879 {
9880 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9881 task_control_port_options, &opts);
9882 }
9883
9884 /*!
9885 * @function kdp_task_is_locked
9886 *
9887 * @abstract
9888 * Checks if task is locked.
9889 *
9890 * @discussion
9891 * NOT SAFE: To be used only by kernel debugger.
9892 *
9893 * @param task task to check
9894 *
9895 * @returns TRUE if the task is locked.
9896 */
9897 boolean_t
kdp_task_is_locked(task_t task)9898 kdp_task_is_locked(task_t task)
9899 {
9900 return kdp_lck_mtx_lock_spin_is_acquired(&task->lock);
9901 }
9902
9903 #if DEBUG || DEVELOPMENT
9904 /**
9905 *
9906 * Check if a threshold limit is valid based on the actual phys memory
9907 * limit. If they are same, race conditions may arise, so we have to prevent
9908 * it to happen.
9909 */
9910 static diagthreshold_check_return
task_check_memorythreshold_is_valid(task_t task,uint64_t new_limit,bool is_diagnostics_value)9911 task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value)
9912 {
9913 int phys_limit_mb;
9914 kern_return_t ret_value;
9915 bool threshold_enabled;
9916 bool dummy;
9917 ret_value = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, &threshold_enabled);
9918 if (ret_value != KERN_SUCCESS) {
9919 return ret_value;
9920 }
9921 if (is_diagnostics_value == true) {
9922 ret_value = task_get_phys_footprint_limit(task, &phys_limit_mb);
9923 } else {
9924 uint64_t diag_limit;
9925 ret_value = task_get_diag_footprint_limit_internal(task, &diag_limit, &dummy);
9926 phys_limit_mb = (int)(diag_limit >> 20);
9927 }
9928 if (ret_value != KERN_SUCCESS) {
9929 return ret_value;
9930 }
9931 if (phys_limit_mb == (int) new_limit) {
9932 if (threshold_enabled == false) {
9933 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED;
9934 } else {
9935 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED;
9936 }
9937 }
9938 if (threshold_enabled == false) {
9939 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED;
9940 } else {
9941 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED;
9942 }
9943 }
9944 #endif
9945
9946 #if CONFIG_EXCLAVES
9947 kern_return_t
task_add_conclave(task_t task,void * vnode,int64_t off,const char * task_conclave_id)9948 task_add_conclave(task_t task, void *vnode, int64_t off, const char *task_conclave_id)
9949 {
9950 /*
9951 * Only launchd or properly entitled tasks can attach tasks to
9952 * conclaves.
9953 */
9954 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9955 return KERN_DENIED;
9956 }
9957
9958 /*
9959 * Only entitled tasks can have conclaves attached.
9960 * Allow tasks which have the SPAWN privilege to also host conclaves.
9961 * This allows xpc proxy to add a conclave before execing a daemon.
9962 */
9963 if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST) &&
9964 !exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9965 return KERN_DENIED;
9966 }
9967
9968 /*
9969 * Make this EXCLAVES_BOOT_STAGE_2 until userspace is actually
9970 * triggering the EXCLAVESKIT boot stage.
9971 */
9972 kern_return_t kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_2);
9973 if (kr != KERN_SUCCESS) {
9974 return kr;
9975 }
9976
9977 return exclaves_conclave_attach(EXCLAVES_DOMAIN_KERNEL, task_conclave_id, task);
9978 }
9979
9980 kern_return_t
task_launch_conclave(mach_port_name_t port __unused)9981 task_launch_conclave(mach_port_name_t port __unused)
9982 {
9983 kern_return_t kr = KERN_FAILURE;
9984 assert3u(port, ==, MACH_PORT_NULL);
9985 exclaves_resource_t *conclave = task_get_conclave(current_task());
9986 if (conclave == NULL) {
9987 return kr;
9988 }
9989
9990 kr = exclaves_conclave_launch(conclave);
9991 if (kr != KERN_SUCCESS) {
9992 return kr;
9993 }
9994 task_set_conclave_taint(current_task());
9995
9996 return KERN_SUCCESS;
9997 }
9998
9999 kern_return_t
task_inherit_conclave(task_t old_task,task_t new_task,void * vnode,int64_t off)10000 task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off)
10001 {
10002 if (old_task->conclave == NULL ||
10003 !exclaves_conclave_is_attached(old_task->conclave)) {
10004 return KERN_SUCCESS;
10005 }
10006
10007 /*
10008 * Only launchd or properly entitled tasks can attach tasks to
10009 * conclaves.
10010 */
10011 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
10012 return KERN_DENIED;
10013 }
10014
10015 /*
10016 * Only entitled tasks can have conclaves attached.
10017 */
10018 if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST)) {
10019 return KERN_DENIED;
10020 }
10021
10022 return exclaves_conclave_inherit(old_task->conclave, old_task, new_task);
10023 }
10024
10025 void
task_clear_conclave(task_t task)10026 task_clear_conclave(task_t task)
10027 {
10028 if (task->exclave_crash_info) {
10029 kfree_data(task->exclave_crash_info, CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE);
10030 task->exclave_crash_info = NULL;
10031 }
10032
10033 if (task->conclave == NULL) {
10034 return;
10035 }
10036
10037 /*
10038 * XXX
10039 * This should only fail if either the conclave is in an unexpected
10040 * state (i.e. not ATTACHED) or if the wrong port is supplied.
10041 * We should re-visit this and make sure we guarantee the above
10042 * constraints.
10043 */
10044 __assert_only kern_return_t ret =
10045 exclaves_conclave_detach(task->conclave, task);
10046 assert3u(ret, ==, KERN_SUCCESS);
10047 }
10048
10049 void
task_stop_conclave(task_t task,bool gather_crash_bt)10050 task_stop_conclave(task_t task, bool gather_crash_bt)
10051 {
10052 thread_t thread = current_thread();
10053
10054 if (task->conclave == NULL) {
10055 return;
10056 }
10057
10058 if (task_should_panic_on_exit_due_to_conclave_taint(task)) {
10059 panic("Conclave tainted task %p terminated\n", task);
10060 }
10061
10062 /* Stash the task on current thread for conclave teardown */
10063 thread->conclave_stop_task = task;
10064
10065 __assert_only kern_return_t ret =
10066 exclaves_conclave_stop(task->conclave, gather_crash_bt);
10067
10068 thread->conclave_stop_task = TASK_NULL;
10069
10070 assert3u(ret, ==, KERN_SUCCESS);
10071 }
10072
10073 kern_return_t
task_stop_conclave_upcall(void)10074 task_stop_conclave_upcall(void)
10075 {
10076 task_t task = current_task();
10077 if (task->conclave == NULL) {
10078 return KERN_INVALID_TASK;
10079 }
10080
10081 return exclaves_conclave_stop_upcall(task->conclave);
10082 }
10083
10084 kern_return_t
task_stop_conclave_upcall_complete(void)10085 task_stop_conclave_upcall_complete(void)
10086 {
10087 task_t task = current_task();
10088 thread_t thread = current_thread();
10089
10090 if (!(thread->th_exclaves_state & TH_EXCLAVES_STOP_UPCALL_PENDING)) {
10091 return KERN_SUCCESS;
10092 }
10093
10094 assert3p(task->conclave, !=, NULL);
10095
10096 return exclaves_conclave_stop_upcall_complete(task->conclave, task);
10097 }
10098
10099 kern_return_t
task_suspend_conclave_upcall(uint64_t * scid_list,size_t scid_list_count)10100 task_suspend_conclave_upcall(uint64_t *scid_list, size_t scid_list_count)
10101 {
10102 task_t task = current_task();
10103 thread_t thread;
10104 int scid_count = 0;
10105 kern_return_t kr;
10106 if (task->conclave == NULL) {
10107 return KERN_INVALID_TASK;
10108 }
10109
10110 kr = task_hold_and_wait(task);
10111
10112 task_lock(task);
10113 queue_iterate(&task->threads, thread, thread_t, task_threads)
10114 {
10115 if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
10116 scid_list[scid_count++] = thread->th_exclaves_scheduling_context_id;
10117 if (scid_count >= scid_list_count) {
10118 break;
10119 }
10120 }
10121 }
10122
10123 task_unlock(task);
10124 return kr;
10125 }
10126
10127 kern_return_t
task_crash_info_conclave_upcall(task_t task,const xnuupcalls_conclavesharedbuffer_s * shared_buf,uint32_t length)10128 task_crash_info_conclave_upcall(task_t task, const xnuupcalls_conclavesharedbuffer_s *shared_buf,
10129 uint32_t length)
10130 {
10131 if (task->conclave == NULL) {
10132 return KERN_INVALID_TASK;
10133 }
10134
10135 /* Allocate the buffer and memcpy it */
10136 int task_crash_info_buffer_size = 0;
10137 uint8_t * task_crash_info_buffer;
10138
10139 if (!length) {
10140 printf("Conclave upcall: task_crash_info_conclave_upcall did not return any page addresses\n");
10141 return KERN_INVALID_ARGUMENT;
10142 }
10143
10144 task_crash_info_buffer_size = CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE;
10145 assert3u(task_crash_info_buffer_size, >=, length);
10146
10147 task_crash_info_buffer = kalloc_data(task_crash_info_buffer_size, Z_WAITOK);
10148 if (!task_crash_info_buffer) {
10149 panic("task_crash_info_conclave_upcall: cannot allocate buffer for task_info shared memory");
10150 return KERN_INVALID_ARGUMENT;
10151 }
10152
10153 uint8_t * dst = task_crash_info_buffer;
10154 uint32_t remaining = length;
10155 for (size_t i = 0; i < CONCLAVE_CRASH_BUFFER_PAGECOUNT; i++) {
10156 if (remaining) {
10157 memcpy(dst, (uint8_t*)phystokv((pmap_paddr_t)shared_buf->physaddr[i]), PAGE_SIZE);
10158 remaining = (remaining >= PAGE_SIZE) ? remaining - PAGE_SIZE : 0;
10159 dst += PAGE_SIZE;
10160 }
10161 }
10162
10163 task_lock(task);
10164 if (task->exclave_crash_info == NULL && task->active) {
10165 task->exclave_crash_info = task_crash_info_buffer;
10166 task->exclave_crash_info_length = length;
10167 task_crash_info_buffer = NULL;
10168 }
10169 task_unlock(task);
10170
10171 if (task_crash_info_buffer) {
10172 kfree_data(task_crash_info_buffer, task_crash_info_buffer_size);
10173 }
10174
10175 return KERN_SUCCESS;
10176 }
10177
10178 exclaves_resource_t *
task_get_conclave(task_t task)10179 task_get_conclave(task_t task)
10180 {
10181 return task->conclave;
10182 }
10183
10184 extern boolean_t IOPMRootDomainGetWillShutdown(void);
10185
10186 TUNABLE(bool, disable_conclave_taint, "disable_conclave_taint", true); /* Do not taint processes when they talk to conclave, so system does not panic when exit. */
10187
10188 static bool
task_should_panic_on_exit_due_to_conclave_taint(task_t task)10189 task_should_panic_on_exit_due_to_conclave_taint(task_t task)
10190 {
10191 /* Check if boot-arg to disable conclave taint is set */
10192 if (disable_conclave_taint) {
10193 return false;
10194 }
10195
10196 /* Check if the system is shutting down */
10197 if (IOPMRootDomainGetWillShutdown()) {
10198 return false;
10199 }
10200
10201 return task_is_conclave_tainted(task);
10202 }
10203
10204 static bool
task_is_conclave_tainted(task_t task)10205 task_is_conclave_tainted(task_t task)
10206 {
10207 return (task->t_exclave_state & TES_CONCLAVE_TAINTED) != 0 &&
10208 !(task->t_exclave_state & TES_CONCLAVE_UNTAINTABLE);
10209 }
10210
10211 static void
task_set_conclave_taint(task_t task)10212 task_set_conclave_taint(task_t task)
10213 {
10214 os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_TAINTED, relaxed);
10215 }
10216
10217 void
task_set_conclave_untaintable(task_t task)10218 task_set_conclave_untaintable(task_t task)
10219 {
10220 os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_UNTAINTABLE, relaxed);
10221 }
10222
10223 void
task_add_conclave_crash_info(task_t task,void * crash_info_ptr)10224 task_add_conclave_crash_info(task_t task, void *crash_info_ptr)
10225 {
10226 __block kern_return_t error = KERN_SUCCESS;
10227 tb_error_t tberr = TB_ERROR_SUCCESS;
10228 void *crash_info;
10229 uint32_t crash_info_length = 0;
10230
10231 if (task->conclave == NULL) {
10232 return;
10233 }
10234
10235 if (task->exclave_crash_info_length == 0) {
10236 return;
10237 }
10238
10239 error = kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
10240 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10241 if (error != KERN_SUCCESS) {
10242 return;
10243 }
10244
10245 crash_info = task->exclave_crash_info;
10246 crash_info_length = task->exclave_crash_info_length;
10247
10248 tberr = stackshot_stackshotresult__unmarshal(crash_info,
10249 (uint64_t)crash_info_length, ^(stackshot_stackshotresult_s result){
10250 error = stackshot_exclaves_process_stackshot(&result, crash_info_ptr);
10251 if (error != KERN_SUCCESS) {
10252 printf("stackshot_exclaves_process_result: error processing stackshot result %d\n", error);
10253 }
10254 });
10255 if (tberr != TB_ERROR_SUCCESS) {
10256 printf("task_conclave_crash: task_add_conclave_crash_info could not unmarshal stackshot data 0x%x\n", tberr);
10257 error = KERN_FAILURE;
10258 goto error_exit;
10259 }
10260
10261 error_exit:
10262 kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_END,
10263 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10264
10265 return;
10266 }
10267
10268 #endif /* CONFIG_EXCLAVES */
10269
10270 #pragma mark task utils
10271
10272 /* defined in bsd/kern/kern_proc.c */
10273 extern void proc_name(int pid, char *buf, int size);
10274 extern char *proc_best_name(struct proc *p);
10275
10276 void
task_procname(task_t task,char * buf,int size)10277 task_procname(task_t task, char *buf, int size)
10278 {
10279 proc_name(task_pid(task), buf, size);
10280 }
10281
10282 void
task_best_name(task_t task,char * buf,size_t size)10283 task_best_name(task_t task, char *buf, size_t size)
10284 {
10285 char *name = proc_best_name(task_get_proc_raw(task));
10286 strlcpy(buf, name, size);
10287 }
10288