1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to [email protected] any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100 #include <mach/mach_test_upcall.h>
101
102 #include <ipc/ipc_importance.h>
103 #include <ipc/ipc_types.h>
104 #include <ipc/ipc_space.h>
105 #include <ipc/ipc_entry.h>
106 #include <ipc/ipc_hash.h>
107 #include <ipc/ipc_init.h>
108
109 #include <kern/kern_types.h>
110 #include <kern/mach_param.h>
111 #include <kern/misc_protos.h>
112 #include <kern/task.h>
113 #include <kern/thread.h>
114 #include <kern/coalition.h>
115 #include <kern/zalloc.h>
116 #include <kern/kalloc.h>
117 #include <kern/kern_cdata.h>
118 #include <kern/processor.h>
119 #include <kern/recount.h>
120 #include <kern/sched_prim.h> /* for thread_wakeup */
121 #include <kern/ipc_tt.h>
122 #include <kern/host.h>
123 #include <kern/clock.h>
124 #include <kern/timer.h>
125 #include <kern/assert.h>
126 #include <kern/affinity.h>
127 #include <kern/exc_resource.h>
128 #include <kern/machine.h>
129 #include <kern/policy_internal.h>
130 #include <kern/restartable.h>
131 #include <kern/ipc_kobject.h>
132
133 #include <corpses/task_corpse.h>
134 #if CONFIG_TELEMETRY
135 #include <kern/telemetry.h>
136 #endif
137
138 #if CONFIG_PERVASIVE_CPI
139 #include <kern/monotonic.h>
140 #include <machine/monotonic.h>
141 #endif /* CONFIG_PERVASIVE_CPI */
142
143 #include <os/log.h>
144
145 #include <vm/pmap.h>
146 #include <vm/vm_map.h>
147 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
148 #include <vm/vm_pageout.h>
149 #include <vm/vm_protos.h>
150 #include <vm/vm_purgeable_internal.h>
151 #include <vm/vm_compressor_pager.h>
152 #include <vm/vm_reclaim_internal.h>
153
154 #include <sys/proc_ro.h>
155 #include <sys/resource.h>
156 #include <sys/signalvar.h> /* for coredump */
157 #include <sys/bsdtask_info.h>
158 #include <sys/kdebug_triage.h>
159 /*
160 * Exported interfaces
161 */
162
163 #include <mach/task_server.h>
164 #include <mach/mach_host_server.h>
165 #include <mach/mach_port_server.h>
166
167 #include <vm/vm_shared_region.h>
168
169 #include <libkern/OSDebug.h>
170 #include <libkern/OSAtomic.h>
171 #include <libkern/section_keywords.h>
172
173 #include <mach-o/loader.h>
174 #include <kdp/kdp_dyld.h>
175
176 #include <kern/sfi.h> /* picks up ledger.h */
177
178 #if CONFIG_MACF
179 #include <security/mac_mach_internal.h>
180 #endif
181
182 #include <IOKit/IOBSD.h>
183 #include <kdp/processor_core.h>
184
185 #include <string.h>
186
187 #if KPERF
188 extern int kpc_force_all_ctrs(task_t, int);
189 #endif
190
191 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
192
193 int64_t next_taskuniqueid = 0;
194 const size_t task_alignment = _Alignof(struct task);
195 extern const size_t proc_alignment;
196 extern size_t proc_struct_size;
197 extern size_t proc_and_task_size;
198 size_t task_struct_size;
199
200 extern uint32_t ipc_control_port_options;
201
202 extern int large_corpse_count;
203
204 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
205 extern void task_disown_frozen_csegs(task_t owner_task);
206
207 static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
208 static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
209 static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
210 static inline void task_zone_init(void);
211
212
213 IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
214 IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
215 .iko_op_no_senders = task_port_no_senders);
216 IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
217 .iko_op_no_senders = task_port_with_flavor_no_senders);
218 IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
219 .iko_op_no_senders = task_port_with_flavor_no_senders);
220 IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
221 .iko_op_no_senders = task_suspension_no_senders);
222
223 #if CONFIG_PROC_RESOURCE_LIMITS
224 static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
225 static mach_port_t task_allocate_fatal_port(void);
226
227 IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
228 .iko_op_stable = true,
229 .iko_op_no_senders = task_fatal_port_no_senders);
230
231 extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
232 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
233
234 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
235 int audio_active = 0;
236
237 /*
238 * structure for tracking zone usage
239 * Used either one per task/thread for all zones or <per-task,per-zone>.
240 */
241 typedef struct zinfo_usage_store_t {
242 /* These fields may be updated atomically, and so must be 8 byte aligned */
243 uint64_t alloc __attribute__((aligned(8))); /* allocation counter */
244 uint64_t free __attribute__((aligned(8))); /* free counter */
245 } zinfo_usage_store_t;
246
247 /**
248 * Return codes related to diag threshold and memory limit
249 */
250 __options_decl(diagthreshold_check_return, int, {
251 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED = 0,
252 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED = 1,
253 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED = 2,
254 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED = 3,
255 });
256
257 /**
258 * Return codes related to diag threshold and memory limit
259 */
260 __options_decl(current_, int, {
261 THRESHOLD_IS_SAME_AS_LIMIT = 0,
262 THRESHOLD_IS_NOT_SAME_AS_LIMIT = 1
263 });
264
265 zinfo_usage_store_t tasks_tkm_private;
266 zinfo_usage_store_t tasks_tkm_shared;
267
268 /* A container to accumulate statistics for expired tasks */
269 expired_task_statistics_t dead_task_statistics;
270 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
271
272 ledger_template_t task_ledger_template = NULL;
273
274 /* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
275 LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
276 LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
277
278 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
279 {.cpu_time = -1,
280 .tkm_private = -1,
281 .tkm_shared = -1,
282 .phys_mem = -1,
283 .wired_mem = -1,
284 .internal = -1,
285 .iokit_mapped = -1,
286 .external = -1,
287 .reusable = -1,
288 .alternate_accounting = -1,
289 .alternate_accounting_compressed = -1,
290 .page_table = -1,
291 .phys_footprint = -1,
292 .internal_compressed = -1,
293 .purgeable_volatile = -1,
294 .purgeable_nonvolatile = -1,
295 .purgeable_volatile_compressed = -1,
296 .purgeable_nonvolatile_compressed = -1,
297 .tagged_nofootprint = -1,
298 .tagged_footprint = -1,
299 .tagged_nofootprint_compressed = -1,
300 .tagged_footprint_compressed = -1,
301 .network_volatile = -1,
302 .network_nonvolatile = -1,
303 .network_volatile_compressed = -1,
304 .network_nonvolatile_compressed = -1,
305 .media_nofootprint = -1,
306 .media_footprint = -1,
307 .media_nofootprint_compressed = -1,
308 .media_footprint_compressed = -1,
309 .graphics_nofootprint = -1,
310 .graphics_footprint = -1,
311 .graphics_nofootprint_compressed = -1,
312 .graphics_footprint_compressed = -1,
313 .neural_nofootprint = -1,
314 .neural_footprint = -1,
315 .neural_nofootprint_compressed = -1,
316 .neural_footprint_compressed = -1,
317 .platform_idle_wakeups = -1,
318 .interrupt_wakeups = -1,
319 #if CONFIG_SCHED_SFI
320 .sfi_wait_times = { 0 /* initialized at runtime */},
321 #endif /* CONFIG_SCHED_SFI */
322 .cpu_time_billed_to_me = -1,
323 .cpu_time_billed_to_others = -1,
324 .physical_writes = -1,
325 .logical_writes = -1,
326 .logical_writes_to_external = -1,
327 #if DEBUG || DEVELOPMENT
328 .pages_grabbed = -1,
329 .pages_grabbed_kern = -1,
330 .pages_grabbed_iopl = -1,
331 .pages_grabbed_upl = -1,
332 #endif
333 #if CONFIG_FREEZE
334 .frozen_to_swap = -1,
335 #endif /* CONFIG_FREEZE */
336 .energy_billed_to_me = -1,
337 .energy_billed_to_others = -1,
338 #if CONFIG_PHYS_WRITE_ACCT
339 .fs_metadata_writes = -1,
340 #endif /* CONFIG_PHYS_WRITE_ACCT */
341 #if CONFIG_MEMORYSTATUS
342 .memorystatus_dirty_time = -1,
343 #endif /* CONFIG_MEMORYSTATUS */
344 .swapins = -1, };
345
346 /* System sleep state */
347 boolean_t tasks_suspend_state;
348
349 __options_decl(send_exec_resource_is_fatal, bool, {
350 IS_NOT_FATAL = false,
351 IS_FATAL = true
352 });
353
354 __options_decl(send_exec_resource_is_diagnostics, bool, {
355 IS_NOT_DIAGNOSTICS = false,
356 IS_DIAGNOSTICS = true
357 });
358
359 __options_decl(send_exec_resource_is_warning, bool, {
360 IS_NOT_WARNING = false,
361 IS_WARNING = true
362 });
363
364 __options_decl(send_exec_resource_options_t, uint8_t, {
365 EXEC_RESOURCE_FATAL = 0x01,
366 EXEC_RESOURCE_DIAGNOSTIC = 0x02,
367 EXEC_RESOURCE_WARNING = 0x04,
368 });
369
370 /**
371 * Actions to take when a process has reached the memory limit or the diagnostics threshold limits
372 */
373 static inline void task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning);
374 #if DEBUG || DEVELOPMENT
375 static inline void task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size);
376 #endif
377 void init_task_ledgers(void);
378 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
379 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
380 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
381 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
382 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options);
383 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
384 #if CONFIG_PROC_RESOURCE_LIMITS
385 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
386 mach_port_name_t current_task_get_fatal_port_name(void);
387 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
388
389 kern_return_t task_suspend_internal_locked(task_t);
390 kern_return_t task_suspend_internal(task_t);
391 kern_return_t task_resume_internal_locked(task_t);
392 kern_return_t task_resume_internal(task_t);
393 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
394
395 extern kern_return_t iokit_task_terminate(task_t task, int phase);
396 extern void iokit_task_app_suspended_changed(task_t task);
397
398 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
399 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
400 extern kern_return_t thread_resume(thread_t thread);
401
402 extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
403
404 // Condition to include diag footprints
405 #define RESETTABLE_DIAG_FOOTPRINT_LIMITS ((DEBUG || DEVELOPMENT) && CONFIG_MEMORYSTATUS)
406
407 // Warn tasks when they hit 80% of their memory limit.
408 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
409
410 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
411 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
412
413 /*
414 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
415 *
416 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
417 * stacktraces, aka micro-stackshots)
418 */
419 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
420
421 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
422 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
423
424 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
425
426 TUNABLE(bool, disable_exc_resource, "disable_exc_resource", false); /* Global override to suppress EXC_RESOURCE for resource monitor violations. */
427 TUNABLE(bool, disable_exc_resource_during_audio, "disable_exc_resource_during_audio", true); /* Global override to suppress EXC_RESOURCE while audio is active */
428
429 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
430 unsigned int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
431
432 /*
433 * Configure per-task memory limit.
434 * The boot-arg is interpreted as Megabytes,
435 * and takes precedence over the device tree.
436 * Setting the boot-arg to 0 disables task limits.
437 */
438 TUNABLE_DT_WRITEABLE(int, max_task_footprint_mb, "/defaults", "kern.max_task_pmem", "max_task_pmem", 0, TUNABLE_DT_NONE);
439
440 /* I/O Monitor Limits */
441 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
442 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
443
444 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
445 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
446
447 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
448 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
449 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
450 int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
451 static boolean_t global_update_logical_writes(int64_t, int64_t*);
452
453 #if DEBUG || DEVELOPMENT
454 static diagthreshold_check_return task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value);
455 #endif
456 #define TASK_MAX_THREAD_LIMIT 256
457
458 #if MACH_ASSERT
459 int pmap_ledgers_panic = 1;
460 int pmap_ledgers_panic_leeway = 3;
461 #endif /* MACH_ASSERT */
462
463 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
464
465 #if CONFIG_COREDUMP
466 int hwm_user_cores = 0; /* high watermark violations generate user core files */
467 #endif
468
469 #ifdef MACH_BSD
470 extern uint32_t proc_platform(const struct proc *);
471 extern uint32_t proc_sdk(struct proc *);
472 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
473 extern int proc_pid(struct proc *p);
474 extern int proc_selfpid(void);
475 extern struct proc *current_proc(void);
476 extern char *proc_name_address(struct proc *p);
477 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
478 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
479 extern void workq_proc_suspended(struct proc *p);
480 extern void workq_proc_resumed(struct proc *p);
481 extern struct proc *kernproc;
482
483 #if CONFIG_MEMORYSTATUS
484 extern void proc_memstat_skip(struct proc* p, boolean_t set);
485 extern void memorystatus_on_ledger_footprint_exceeded(int warning, bool memlimit_is_active, bool memlimit_is_fatal);
486 extern void memorystatus_log_exception(const int max_footprint_mb, bool memlimit_is_active, bool memlimit_is_fatal);
487 extern void memorystatus_log_diag_threshold_exception(const int diag_threshold_value);
488 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
489 extern uint64_t memorystatus_available_memory_internal(struct proc *p);
490
491 #if DEVELOPMENT || DEBUG
492 extern void memorystatus_abort_vm_map_fork(task_t);
493 #endif
494
495 #endif /* CONFIG_MEMORYSTATUS */
496
497 #endif /* MACH_BSD */
498
499 /* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
500 static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
501
502 /*
503 * Defaults for controllable EXC_GUARD behaviors
504 *
505 * Internal builds are fatal by default (except BRIDGE).
506 * Create an alternate set of defaults for special processes by name.
507 */
508 struct task_exc_guard_named_default {
509 char *name;
510 uint32_t behavior;
511 };
512 #define _TASK_EXC_GUARD_MP_CORPSE (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
513 #define _TASK_EXC_GUARD_MP_ONCE (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
514 #define _TASK_EXC_GUARD_MP_FATAL (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
515
516 #define _TASK_EXC_GUARD_VM_CORPSE (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
517 #define _TASK_EXC_GUARD_VM_ONCE (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
518 #define _TASK_EXC_GUARD_VM_FATAL (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
519
520 #define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
521 #define _TASK_EXC_GUARD_ALL_ONCE (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
522 #define _TASK_EXC_GUARD_ALL_FATAL (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
523
524 /* cannot turn off FATAL and DELIVER bit if set */
525 uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
526 TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
527 /* cannot turn on ONCE bit if unset */
528 uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
529
530 #if !defined(XNU_TARGET_OS_BRIDGE)
531
532 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
533 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
534 /*
535 * These "by-process-name" default overrides are intended to be a short-term fix to
536 * quickly get over races between changes introducing new EXC_GUARD raising behaviors
537 * in some process and a change in default behavior for same. We should ship with
538 * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
539 * exception behavior via task_set_exc_guard_behavior()).
540 *
541 * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
542 * task_exc_guard_default when transitioning this list between empty and
543 * non-empty.
544 */
545 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
546
547 #else /* !defined(XNU_TARGET_OS_BRIDGE) */
548
549 uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
550 uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
551 static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
552
553 #endif /* !defined(XNU_TARGET_OS_BRIDGE) */
554
555 /* Forwards */
556
557 static void task_hold_locked(task_t task);
558 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
559 static void task_release_locked(task_t task);
560 extern task_t proc_get_task_raw(void *proc);
561 extern void task_ref_hold_proc_task_struct(task_t task);
562 extern void task_release_proc_task_struct(task_t task);
563
564 static void task_synchronizer_destroy_all(task_t task);
565 static os_ref_count_t
566 task_add_turnstile_watchports_locked(
567 task_t task,
568 struct task_watchports *watchports,
569 struct task_watchport_elem **previous_elem_array,
570 ipc_port_t *portwatch_ports,
571 uint32_t portwatch_count);
572
573 static os_ref_count_t
574 task_remove_turnstile_watchports_locked(
575 task_t task,
576 struct task_watchports *watchports,
577 ipc_port_t *port_freelist);
578
579 static struct task_watchports *
580 task_watchports_alloc_init(
581 task_t task,
582 thread_t thread,
583 uint32_t count);
584
585 static void
586 task_watchports_deallocate(
587 struct task_watchports *watchports);
588
589 __attribute__((always_inline)) inline void
task_lock(task_t task)590 task_lock(task_t task)
591 {
592 lck_mtx_lock(&(task)->lock);
593 }
594
595 __attribute__((always_inline)) inline void
task_unlock(task_t task)596 task_unlock(task_t task)
597 {
598 lck_mtx_unlock(&(task)->lock);
599 }
600
601 void
task_set_64bit(task_t task,boolean_t is_64bit,boolean_t is_64bit_data)602 task_set_64bit(
603 task_t task,
604 boolean_t is_64bit,
605 boolean_t is_64bit_data)
606 {
607 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
608 thread_t thread;
609 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
610
611 task_lock(task);
612
613 /*
614 * Switching to/from 64-bit address spaces
615 */
616 if (is_64bit) {
617 if (!task_has_64Bit_addr(task)) {
618 task_set_64Bit_addr(task);
619 }
620 } else {
621 if (task_has_64Bit_addr(task)) {
622 task_clear_64Bit_addr(task);
623 }
624 }
625
626 /*
627 * Switching to/from 64-bit register state.
628 */
629 if (is_64bit_data) {
630 if (task_has_64Bit_data(task)) {
631 goto out;
632 }
633
634 task_set_64Bit_data(task);
635 } else {
636 if (!task_has_64Bit_data(task)) {
637 goto out;
638 }
639
640 task_clear_64Bit_data(task);
641 }
642
643 /* FIXME: On x86, the thread save state flavor can diverge from the
644 * task's 64-bit feature flag due to the 32-bit/64-bit register save
645 * state dichotomy. Since we can be pre-empted in this interval,
646 * certain routines may observe the thread as being in an inconsistent
647 * state with respect to its task's 64-bitness.
648 */
649
650 #if defined(__x86_64__) || defined(__arm64__)
651 queue_iterate(&task->threads, thread, thread_t, task_threads) {
652 thread_mtx_lock(thread);
653 machine_thread_switch_addrmode(thread);
654 thread_mtx_unlock(thread);
655 }
656 #endif /* defined(__x86_64__) || defined(__arm64__) */
657
658 out:
659 task_unlock(task);
660 }
661
662 bool
task_get_64bit_addr(task_t task)663 task_get_64bit_addr(task_t task)
664 {
665 return task_has_64Bit_addr(task);
666 }
667
668 bool
task_get_64bit_data(task_t task)669 task_get_64bit_data(task_t task)
670 {
671 return task_has_64Bit_data(task);
672 }
673
674 void
task_set_platform_binary(task_t task,boolean_t is_platform)675 task_set_platform_binary(
676 task_t task,
677 boolean_t is_platform)
678 {
679 if (is_platform) {
680 task_ro_flags_set(task, TFRO_PLATFORM);
681 } else {
682 task_ro_flags_clear(task, TFRO_PLATFORM);
683 }
684 }
685
686 boolean_t
task_get_platform_binary(task_t task)687 task_get_platform_binary(task_t task)
688 {
689 return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
690 }
691
692 boolean_t
task_is_a_corpse(task_t task)693 task_is_a_corpse(task_t task)
694 {
695 return (task_ro_flags_get(task) & TFRO_CORPSE) != 0;
696 }
697
698 void
task_set_corpse(task_t task)699 task_set_corpse(task_t task)
700 {
701 return task_ro_flags_set(task, TFRO_CORPSE);
702 }
703
704 void
task_set_immovable_pinned(task_t task)705 task_set_immovable_pinned(task_t task)
706 {
707 ipc_task_set_immovable_pinned(task);
708 }
709
710 /*
711 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
712 * Returns "false" if flag is already set, and "true" in other cases.
713 */
714 bool
task_set_ca_client_wi(task_t task,boolean_t set_or_clear)715 task_set_ca_client_wi(
716 task_t task,
717 boolean_t set_or_clear)
718 {
719 bool ret = true;
720 task_lock(task);
721 if (set_or_clear) {
722 /* Tasks can have only one CA_CLIENT work interval */
723 if (task->t_flags & TF_CA_CLIENT_WI) {
724 ret = false;
725 } else {
726 task->t_flags |= TF_CA_CLIENT_WI;
727 }
728 } else {
729 task->t_flags &= ~TF_CA_CLIENT_WI;
730 }
731 task_unlock(task);
732 return ret;
733 }
734
735 /*
736 * task_set_dyld_info() is called at most three times.
737 * 1) at task struct creation to set addr/size to zero.
738 * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
739 * 3) is from dyld itself to update location of all_image_info
740 * For security any calls after that are ignored. The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
741 */
742 kern_return_t
task_set_dyld_info(task_t task,mach_vm_address_t addr,mach_vm_size_t size)743 task_set_dyld_info(
744 task_t task,
745 mach_vm_address_t addr,
746 mach_vm_size_t size)
747 {
748 mach_vm_address_t end;
749 if (os_add_overflow(addr, size, &end)) {
750 return KERN_FAILURE;
751 }
752
753 task_lock(task);
754 /* don't accept updates if all_image_info_addr is final */
755 if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
756 bool inputNonZero = ((addr != 0) || (size != 0));
757 bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
758 task->all_image_info_addr = addr;
759 task->all_image_info_size = size;
760 /* can only change from a non-zero value to another non-zero once */
761 if (inputNonZero && currentNonZero) {
762 task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
763 }
764 task_unlock(task);
765 return KERN_SUCCESS;
766 } else {
767 task_unlock(task);
768 return KERN_FAILURE;
769 }
770 }
771
772 bool
task_donates_own_pages(task_t task)773 task_donates_own_pages(
774 task_t task)
775 {
776 return task->donates_own_pages;
777 }
778
779 void
task_set_mach_header_address(task_t task,mach_vm_address_t addr)780 task_set_mach_header_address(
781 task_t task,
782 mach_vm_address_t addr)
783 {
784 task_lock(task);
785 task->mach_header_vm_address = addr;
786 task_unlock(task);
787 }
788
789 void
task_bank_reset(__unused task_t task)790 task_bank_reset(__unused task_t task)
791 {
792 if (task->bank_context != NULL) {
793 bank_task_destroy(task);
794 }
795 }
796
797 /*
798 * NOTE: This should only be called when the P_LINTRANSIT
799 * flag is set (the proc_trans lock is held) on the
800 * proc associated with the task.
801 */
802 void
task_bank_init(__unused task_t task)803 task_bank_init(__unused task_t task)
804 {
805 if (task->bank_context != NULL) {
806 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
807 }
808 bank_task_initialize(task);
809 }
810
811 void
task_set_did_exec_flag(task_t task)812 task_set_did_exec_flag(task_t task)
813 {
814 task->t_procflags |= TPF_DID_EXEC;
815 }
816
817 void
task_clear_exec_copy_flag(task_t task)818 task_clear_exec_copy_flag(task_t task)
819 {
820 task->t_procflags &= ~TPF_EXEC_COPY;
821 }
822
823 event_t
task_get_return_wait_event(task_t task)824 task_get_return_wait_event(task_t task)
825 {
826 return (event_t)&task->returnwait_inheritor;
827 }
828
829 void
task_clear_return_wait(task_t task,uint32_t flags)830 task_clear_return_wait(task_t task, uint32_t flags)
831 {
832 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
833 thread_wakeup(task_get_return_wait_event(task));
834 }
835
836 if (flags & TCRW_CLEAR_FINAL_WAIT) {
837 is_write_lock(task->itk_space);
838
839 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
840 task->returnwait_inheritor = NULL;
841
842 if (flags & TCRW_CLEAR_EXEC_COMPLETE) {
843 task->t_returnwaitflags &= ~TRW_LEXEC_COMPLETE;
844 }
845
846 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
847 struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
848 TURNSTILE_ULOCK);
849
850 waitq_wakeup64_all(&turnstile->ts_waitq,
851 CAST_EVENT64_T(task_get_return_wait_event(task)),
852 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
853
854 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
855
856 turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
857 turnstile_cleanup();
858 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
859 }
860 is_write_unlock(task->itk_space);
861 }
862 }
863
864 void __attribute__((noreturn))
task_wait_to_return(void)865 task_wait_to_return(void)
866 {
867 task_t task = current_task();
868 uint8_t returnwaitflags;
869
870 is_write_lock(task->itk_space);
871
872 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
873 struct turnstile *turnstile = turnstile_prepare_hash((uintptr_t) task_get_return_wait_event(task),
874 TURNSTILE_ULOCK);
875
876 do {
877 task->t_returnwaitflags |= TRW_LRETURNWAITER;
878 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
879 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
880
881 waitq_assert_wait64(&turnstile->ts_waitq,
882 CAST_EVENT64_T(task_get_return_wait_event(task)),
883 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
884
885 is_write_unlock(task->itk_space);
886
887 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
888
889 thread_block(THREAD_CONTINUE_NULL);
890
891 is_write_lock(task->itk_space);
892 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
893
894 turnstile_complete_hash((uintptr_t) task_get_return_wait_event(task), TURNSTILE_ULOCK);
895 }
896
897 returnwaitflags = task->t_returnwaitflags;
898 is_write_unlock(task->itk_space);
899 turnstile_cleanup();
900
901
902 #if CONFIG_MACF
903 /*
904 * Before jumping to userspace and allowing this process
905 * to execute any code, make sure its credentials are cached,
906 * and notify any interested parties.
907 */
908 extern void mach_kauth_cred_thread_update(void);
909
910 mach_kauth_cred_thread_update();
911 if (returnwaitflags & TRW_LEXEC_COMPLETE) {
912 mac_proc_notify_exec_complete(current_proc());
913 }
914 #endif
915
916 thread_bootstrap_return();
917 }
918
919 boolean_t
task_is_exec_copy(task_t task)920 task_is_exec_copy(task_t task)
921 {
922 return task_is_exec_copy_internal(task);
923 }
924
925 boolean_t
task_did_exec(task_t task)926 task_did_exec(task_t task)
927 {
928 return task_did_exec_internal(task);
929 }
930
931 boolean_t
task_is_active(task_t task)932 task_is_active(task_t task)
933 {
934 return task->active;
935 }
936
937 boolean_t
task_is_halting(task_t task)938 task_is_halting(task_t task)
939 {
940 return task->halting;
941 }
942
943 void
task_init(void)944 task_init(void)
945 {
946 if (max_task_footprint_mb != 0) {
947 #if CONFIG_MEMORYSTATUS
948 if (max_task_footprint_mb < 50) {
949 printf("Warning: max_task_pmem %d below minimum.\n",
950 max_task_footprint_mb);
951 max_task_footprint_mb = 50;
952 }
953 printf("Limiting task physical memory footprint to %d MB\n",
954 max_task_footprint_mb);
955
956 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
957
958 /*
959 * Configure the per-task memory limit warning level.
960 * This is computed as a percentage.
961 */
962 max_task_footprint_warning_level = 0;
963
964 if (max_mem < 0x40000000) {
965 /*
966 * On devices with < 1GB of memory:
967 * -- set warnings to 50MB below the per-task limit.
968 */
969 if (max_task_footprint_mb > 50) {
970 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
971 }
972 } else {
973 /*
974 * On devices with >= 1GB of memory:
975 * -- set warnings to 100MB below the per-task limit.
976 */
977 if (max_task_footprint_mb > 100) {
978 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
979 }
980 }
981
982 /*
983 * Never allow warning level to land below the default.
984 */
985 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
986 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
987 }
988
989 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
990
991 #else
992 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
993 #endif /* CONFIG_MEMORYSTATUS */
994 }
995
996 #if DEVELOPMENT || DEBUG
997 PE_parse_boot_argn("task_exc_guard_default",
998 &task_exc_guard_default,
999 sizeof(task_exc_guard_default));
1000 #endif /* DEVELOPMENT || DEBUG */
1001
1002 #if CONFIG_COREDUMP
1003 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
1004 sizeof(hwm_user_cores))) {
1005 hwm_user_cores = 0;
1006 }
1007 #endif
1008
1009 proc_init_cpumon_params();
1010
1011 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
1012 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
1013 }
1014
1015 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
1016 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
1017 }
1018
1019 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
1020 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
1021 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
1022 }
1023
1024 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
1025 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
1026 }
1027
1028 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
1029 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
1030 }
1031
1032 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
1033 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
1034 }
1035
1036 /*
1037 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
1038 * sets up the ledgers for the default coalition. If we don't have coalitions,
1039 * then we have to call it now.
1040 */
1041 #if CONFIG_COALITIONS
1042 assert(task_ledger_template);
1043 #else /* CONFIG_COALITIONS */
1044 init_task_ledgers();
1045 #endif /* CONFIG_COALITIONS */
1046
1047 task_ref_init();
1048 task_zone_init();
1049
1050 #ifdef __LP64__
1051 boolean_t is_64bit = TRUE;
1052 #else
1053 boolean_t is_64bit = FALSE;
1054 #endif
1055
1056 kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
1057 kernel_task = proc_get_task_raw(kernproc);
1058
1059 /*
1060 * Create the kernel task as the first task.
1061 */
1062 if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1063 is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, kernel_task) != KERN_SUCCESS) {
1064 panic("task_init");
1065 }
1066
1067 ipc_task_enable(kernel_task);
1068
1069 #if defined(HAS_APPLE_PAC)
1070 kernel_task->rop_pid = ml_default_rop_pid();
1071 kernel_task->jop_pid = ml_default_jop_pid();
1072 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1073 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1074 ml_task_set_disable_user_jop(kernel_task, FALSE);
1075 #endif
1076
1077 vm_map_deallocate(kernel_task->map);
1078 kernel_task->map = kernel_map;
1079 }
1080
1081 static inline void
task_zone_init(void)1082 task_zone_init(void)
1083 {
1084 proc_struct_size = roundup(proc_struct_size, task_alignment);
1085 task_struct_size = roundup(sizeof(struct task), proc_alignment);
1086 proc_and_task_size = proc_struct_size + task_struct_size;
1087
1088 proc_task_zone = zone_create_ext("proc_task", proc_and_task_size,
1089 ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1090 }
1091
1092 /*
1093 * Task ledgers
1094 * ------------
1095 *
1096 * phys_footprint
1097 * Physical footprint: This is the sum of:
1098 * + (internal - alternate_accounting)
1099 * + (internal_compressed - alternate_accounting_compressed)
1100 * + iokit_mapped
1101 * + purgeable_nonvolatile
1102 * + purgeable_nonvolatile_compressed
1103 * + page_table
1104 *
1105 * internal
1106 * The task's anonymous memory, which on iOS is always resident.
1107 *
1108 * internal_compressed
1109 * Amount of this task's internal memory which is held by the compressor.
1110 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1111 * and could be either decompressed back into memory, or paged out to storage, depending
1112 * on our implementation.
1113 *
1114 * iokit_mapped
1115 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1116 * clean/dirty or internal/external state].
1117 *
1118 * alternate_accounting
1119 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1120 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1121 * double counting.
1122 *
1123 * pages_grabbed
1124 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1125 * which track UPL, IOPL and Kernel page grabs.
1126 */
1127 void
init_task_ledgers(void)1128 init_task_ledgers(void)
1129 {
1130 ledger_template_t t;
1131
1132 assert(task_ledger_template == NULL);
1133 assert(kernel_task == TASK_NULL);
1134
1135 #if MACH_ASSERT
1136 PE_parse_boot_argn("pmap_ledgers_panic",
1137 &pmap_ledgers_panic,
1138 sizeof(pmap_ledgers_panic));
1139 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1140 &pmap_ledgers_panic_leeway,
1141 sizeof(pmap_ledgers_panic_leeway));
1142 #endif /* MACH_ASSERT */
1143
1144 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1145 panic("couldn't create task ledger template");
1146 }
1147
1148 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1149 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1150 "physmem", "bytes");
1151 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1152 "bytes");
1153 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1154 "bytes");
1155 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1156 "bytes");
1157 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1158 "bytes");
1159 task_ledgers.iokit_mapped = ledger_entry_add_with_flags(t, "iokit_mapped", "mappings",
1160 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1161 task_ledgers.alternate_accounting = ledger_entry_add_with_flags(t, "alternate_accounting", "physmem",
1162 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1163 task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(t, "alternate_accounting_compressed", "physmem",
1164 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1165 task_ledgers.page_table = ledger_entry_add_with_flags(t, "page_table", "physmem",
1166 "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1167 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1168 "bytes");
1169 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1170 "bytes");
1171 task_ledgers.reusable = ledger_entry_add(t, "reusable", "physmem", "bytes");
1172 task_ledgers.external = ledger_entry_add(t, "external", "physmem", "bytes");
1173 task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(t, "purgeable_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1174 task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(t, "purgeable_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1175 task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(t, "purgeable_volatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1176 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(t, "purgeable_nonvolatile_compress", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1177 #if DEBUG || DEVELOPMENT
1178 task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1179 task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1180 task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1181 task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1182 #endif
1183 task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(t, "tagged_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1184 task_ledgers.tagged_footprint = ledger_entry_add_with_flags(t, "tagged_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1185 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(t, "tagged_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1186 task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(t, "tagged_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1187 task_ledgers.network_volatile = ledger_entry_add_with_flags(t, "network_volatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1188 task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(t, "network_nonvolatile", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1189 task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(t, "network_volatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1190 task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(t, "network_nonvolatile_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1191 task_ledgers.media_nofootprint = ledger_entry_add_with_flags(t, "media_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1192 task_ledgers.media_footprint = ledger_entry_add_with_flags(t, "media_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1193 task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(t, "media_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1194 task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(t, "media_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1195 task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(t, "graphics_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1196 task_ledgers.graphics_footprint = ledger_entry_add_with_flags(t, "graphics_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1197 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(t, "graphics_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1198 task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(t, "graphics_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1199 task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(t, "neural_nofootprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1200 task_ledgers.neural_footprint = ledger_entry_add_with_flags(t, "neural_footprint", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1201 task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(t, "neural_nofootprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1202 task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(t, "neural_footprint_compressed", "physmem", "bytes", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1203
1204 #if CONFIG_FREEZE
1205 task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1206 #endif /* CONFIG_FREEZE */
1207
1208 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1209 "count");
1210 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1211 "count");
1212
1213 #if CONFIG_SCHED_SFI
1214 sfi_class_id_t class_id, ledger_alias;
1215 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1216 task_ledgers.sfi_wait_times[class_id] = -1;
1217 }
1218
1219 /* don't account for UNSPECIFIED */
1220 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1221 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1222 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1223 /* Check to see if alias has been registered yet */
1224 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1225 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1226 } else {
1227 /* Otherwise, initialize it first */
1228 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1229 }
1230 } else {
1231 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1232 }
1233
1234 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1235 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1236 }
1237 }
1238
1239 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1240 #endif /* CONFIG_SCHED_SFI */
1241
1242 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1243 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1244 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1245 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1246 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1247 #if CONFIG_PHYS_WRITE_ACCT
1248 task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1249 #endif /* CONFIG_PHYS_WRITE_ACCT */
1250 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1251 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1252
1253 #if CONFIG_MEMORYSTATUS
1254 task_ledgers.memorystatus_dirty_time = ledger_entry_add(t, "memorystatus_dirty_time", "physmem", "ns");
1255 #endif /* CONFIG_MEMORYSTATUS */
1256
1257 task_ledgers.swapins = ledger_entry_add_with_flags(t, "swapins", "physmem", "bytes",
1258 LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1259
1260 if ((task_ledgers.cpu_time < 0) ||
1261 (task_ledgers.tkm_private < 0) ||
1262 (task_ledgers.tkm_shared < 0) ||
1263 (task_ledgers.phys_mem < 0) ||
1264 (task_ledgers.wired_mem < 0) ||
1265 (task_ledgers.internal < 0) ||
1266 (task_ledgers.external < 0) ||
1267 (task_ledgers.reusable < 0) ||
1268 (task_ledgers.iokit_mapped < 0) ||
1269 (task_ledgers.alternate_accounting < 0) ||
1270 (task_ledgers.alternate_accounting_compressed < 0) ||
1271 (task_ledgers.page_table < 0) ||
1272 (task_ledgers.phys_footprint < 0) ||
1273 (task_ledgers.internal_compressed < 0) ||
1274 (task_ledgers.purgeable_volatile < 0) ||
1275 (task_ledgers.purgeable_nonvolatile < 0) ||
1276 (task_ledgers.purgeable_volatile_compressed < 0) ||
1277 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1278 (task_ledgers.tagged_nofootprint < 0) ||
1279 (task_ledgers.tagged_footprint < 0) ||
1280 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1281 (task_ledgers.tagged_footprint_compressed < 0) ||
1282 #if CONFIG_FREEZE
1283 (task_ledgers.frozen_to_swap < 0) ||
1284 #endif /* CONFIG_FREEZE */
1285 (task_ledgers.network_volatile < 0) ||
1286 (task_ledgers.network_nonvolatile < 0) ||
1287 (task_ledgers.network_volatile_compressed < 0) ||
1288 (task_ledgers.network_nonvolatile_compressed < 0) ||
1289 (task_ledgers.media_nofootprint < 0) ||
1290 (task_ledgers.media_footprint < 0) ||
1291 (task_ledgers.media_nofootprint_compressed < 0) ||
1292 (task_ledgers.media_footprint_compressed < 0) ||
1293 (task_ledgers.graphics_nofootprint < 0) ||
1294 (task_ledgers.graphics_footprint < 0) ||
1295 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1296 (task_ledgers.graphics_footprint_compressed < 0) ||
1297 (task_ledgers.neural_nofootprint < 0) ||
1298 (task_ledgers.neural_footprint < 0) ||
1299 (task_ledgers.neural_nofootprint_compressed < 0) ||
1300 (task_ledgers.neural_footprint_compressed < 0) ||
1301 (task_ledgers.platform_idle_wakeups < 0) ||
1302 (task_ledgers.interrupt_wakeups < 0) ||
1303 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1304 (task_ledgers.physical_writes < 0) ||
1305 (task_ledgers.logical_writes < 0) ||
1306 (task_ledgers.logical_writes_to_external < 0) ||
1307 #if CONFIG_PHYS_WRITE_ACCT
1308 (task_ledgers.fs_metadata_writes < 0) ||
1309 #endif /* CONFIG_PHYS_WRITE_ACCT */
1310 #if CONFIG_MEMORYSTATUS
1311 (task_ledgers.memorystatus_dirty_time < 0) ||
1312 #endif /* CONFIG_MEMORYSTATUS */
1313 (task_ledgers.energy_billed_to_me < 0) ||
1314 (task_ledgers.energy_billed_to_others < 0) ||
1315 (task_ledgers.swapins < 0)
1316 ) {
1317 panic("couldn't create entries for task ledger template");
1318 }
1319
1320 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1321 ledger_track_credit_only(t, task_ledgers.internal);
1322 ledger_track_credit_only(t, task_ledgers.external);
1323 ledger_track_credit_only(t, task_ledgers.reusable);
1324
1325 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1326 ledger_track_maximum(t, task_ledgers.phys_mem, 60);
1327 ledger_track_maximum(t, task_ledgers.internal, 60);
1328 ledger_track_maximum(t, task_ledgers.internal_compressed, 60);
1329 ledger_track_maximum(t, task_ledgers.reusable, 60);
1330 ledger_track_maximum(t, task_ledgers.external, 60);
1331 #if MACH_ASSERT
1332 if (pmap_ledgers_panic) {
1333 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1334 ledger_panic_on_negative(t, task_ledgers.page_table);
1335 ledger_panic_on_negative(t, task_ledgers.internal);
1336 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1337 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1338 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1339 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1340 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1341 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1342 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1343 #if CONFIG_PHYS_WRITE_ACCT
1344 ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1345 #endif /* CONFIG_PHYS_WRITE_ACCT */
1346
1347 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1348 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1349 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1350 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1351 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1352 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1353 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1354 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1355 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1356 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1357 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1358 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1359 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1360 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1361 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1362 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1363 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1364 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1365 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1366 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1367 }
1368 #endif /* MACH_ASSERT */
1369
1370 #if CONFIG_MEMORYSTATUS
1371 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1372 #endif /* CONFIG_MEMORYSTATUS */
1373
1374 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1375 task_wakeups_rate_exceeded, NULL, NULL);
1376 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1377
1378 #if !XNU_MONITOR
1379 ledger_template_complete(t);
1380 #else /* !XNU_MONITOR */
1381 ledger_template_complete_secure_alloc(t);
1382 #endif /* XNU_MONITOR */
1383 task_ledger_template = t;
1384 }
1385
1386 /* Create a task, but leave the task ports disabled */
1387 kern_return_t
task_create_internal(task_t parent_task,proc_ro_t proc_ro,coalition_t * parent_coalitions __unused,boolean_t inherit_memory,boolean_t is_64bit,boolean_t is_64bit_data,uint32_t t_flags,uint32_t t_flags_ro,uint32_t t_procflags,uint8_t t_returnwaitflags,task_t child_task)1388 task_create_internal(
1389 task_t parent_task, /* Null-able */
1390 proc_ro_t proc_ro,
1391 coalition_t *parent_coalitions __unused,
1392 boolean_t inherit_memory,
1393 boolean_t is_64bit,
1394 boolean_t is_64bit_data,
1395 uint32_t t_flags,
1396 uint32_t t_flags_ro,
1397 uint32_t t_procflags,
1398 uint8_t t_returnwaitflags,
1399 task_t child_task)
1400 {
1401 task_t new_task;
1402 vm_shared_region_t shared_region;
1403 ledger_t ledger = NULL;
1404 struct task_ro_data task_ro_data = {};
1405 uint32_t parent_t_flags_ro = 0;
1406
1407 new_task = child_task;
1408
1409 if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1410 return KERN_RESOURCE_SHORTAGE;
1411 }
1412
1413 /* allocate with active entries */
1414 assert(task_ledger_template != NULL);
1415 ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1416 if (ledger == NULL) {
1417 task_ref_count_fini(new_task);
1418 return KERN_RESOURCE_SHORTAGE;
1419 }
1420
1421 counter_alloc(&(new_task->faults));
1422
1423 #if defined(HAS_APPLE_PAC)
1424 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1425 ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1426 ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1427 #endif
1428
1429
1430 new_task->ledger = ledger;
1431
1432 /* if inherit_memory is true, parent_task MUST not be NULL */
1433 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1434 #if CONFIG_DEFERRED_RECLAIM
1435 if (parent_task->deferred_reclamation_metadata) {
1436 /*
1437 * Prevent concurrent reclaims while we're forking the parent_task's map,
1438 * so that the child's map is in sync with the forked reclamation
1439 * metadata.
1440 */
1441 vm_deferred_reclamation_buffer_lock(parent_task->deferred_reclamation_metadata);
1442 }
1443 #endif /* CONFIG_DEFERRED_RECLAIM */
1444 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1445 #if CONFIG_DEFERRED_RECLAIM
1446 if (parent_task->deferred_reclamation_metadata) {
1447 new_task->deferred_reclamation_metadata =
1448 vm_deferred_reclamation_buffer_fork(new_task, parent_task->deferred_reclamation_metadata);
1449 }
1450 #endif /* CONFIG_DEFERRED_RECLAIM */
1451 } else {
1452 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1453 pmap_t pmap = pmap_create_options(ledger, 0, pmap_flags);
1454 vm_map_t new_map;
1455
1456 if (pmap == NULL) {
1457 counter_free(&new_task->faults);
1458 ledger_dereference(ledger);
1459 task_ref_count_fini(new_task);
1460 return KERN_RESOURCE_SHORTAGE;
1461 }
1462 new_map = vm_map_create_options(pmap,
1463 (vm_map_offset_t)(VM_MIN_ADDRESS),
1464 (vm_map_offset_t)(VM_MAX_ADDRESS),
1465 VM_MAP_CREATE_PAGEABLE);
1466 if (parent_task) {
1467 vm_map_inherit_limits(new_map, parent_task->map);
1468 }
1469 new_task->map = new_map;
1470 }
1471
1472 if (new_task->map == NULL) {
1473 counter_free(&new_task->faults);
1474 ledger_dereference(ledger);
1475 task_ref_count_fini(new_task);
1476 return KERN_RESOURCE_SHORTAGE;
1477 }
1478
1479 #if defined(CONFIG_SCHED_MULTIQ)
1480 new_task->sched_group = sched_group_create();
1481 #endif
1482
1483 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1484 queue_init(&new_task->threads);
1485 new_task->suspend_count = 0;
1486 new_task->thread_count = 0;
1487 new_task->active_thread_count = 0;
1488 new_task->user_stop_count = 0;
1489 new_task->legacy_stop_count = 0;
1490 new_task->active = TRUE;
1491 new_task->halting = FALSE;
1492 new_task->priv_flags = 0;
1493 new_task->t_flags = t_flags;
1494 task_ro_data.t_flags_ro = t_flags_ro;
1495 new_task->t_procflags = t_procflags;
1496 new_task->t_returnwaitflags = t_returnwaitflags;
1497 new_task->returnwait_inheritor = current_thread();
1498 new_task->importance = 0;
1499 new_task->crashed_thread_id = 0;
1500 new_task->watchports = NULL;
1501 new_task->t_rr_ranges = NULL;
1502
1503 new_task->bank_context = NULL;
1504
1505 if (parent_task) {
1506 parent_t_flags_ro = task_ro_flags_get(parent_task);
1507 }
1508
1509 #if __has_feature(ptrauth_calls)
1510 /* Inherit the pac exception flags from parent if in fork */
1511 if (parent_task && inherit_memory) {
1512 task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1513 TFRO_PAC_EXC_FATAL));
1514 }
1515 #endif
1516
1517 #ifdef MACH_BSD
1518 new_task->corpse_info = NULL;
1519 #endif /* MACH_BSD */
1520
1521 /* kern_task not created by this function has unique id 0, start with 1 here. */
1522 task_set_uniqueid(new_task);
1523
1524 #if CONFIG_MACF
1525 set_task_crash_label(new_task, NULL);
1526
1527 task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1528 task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1529 #endif
1530
1531 #if CONFIG_MEMORYSTATUS
1532 if (max_task_footprint != 0) {
1533 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1534 }
1535 #endif /* CONFIG_MEMORYSTATUS */
1536
1537 if (task_wakeups_monitor_rate != 0) {
1538 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1539 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1540 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1541 }
1542
1543 #if CONFIG_IO_ACCOUNTING
1544 uint32_t flags = IOMON_ENABLE;
1545 task_io_monitor_ctl(new_task, &flags);
1546 #endif /* CONFIG_IO_ACCOUNTING */
1547
1548 machine_task_init(new_task, parent_task, inherit_memory);
1549
1550 new_task->task_debug = NULL;
1551
1552 #if DEVELOPMENT || DEBUG
1553 new_task->task_unnested = FALSE;
1554 new_task->task_disconnected_count = 0;
1555 #endif
1556 queue_init(&new_task->semaphore_list);
1557 new_task->semaphores_owned = 0;
1558
1559 new_task->vtimers = 0;
1560
1561 new_task->shared_region = NULL;
1562
1563 new_task->affinity_space = NULL;
1564
1565 new_task->t_kpc = 0;
1566
1567 new_task->pidsuspended = FALSE;
1568 new_task->frozen = FALSE;
1569 new_task->changing_freeze_state = FALSE;
1570 new_task->rusage_cpu_flags = 0;
1571 new_task->rusage_cpu_percentage = 0;
1572 new_task->rusage_cpu_interval = 0;
1573 new_task->rusage_cpu_deadline = 0;
1574 new_task->rusage_cpu_callt = NULL;
1575 #if MACH_ASSERT
1576 new_task->suspends_outstanding = 0;
1577 #endif
1578 recount_task_init(&new_task->tk_recount);
1579
1580 #if HYPERVISOR
1581 new_task->hv_task_target = NULL;
1582 #endif /* HYPERVISOR */
1583
1584 #if CONFIG_TASKWATCH
1585 queue_init(&new_task->task_watchers);
1586 new_task->num_taskwatchers = 0;
1587 new_task->watchapplying = 0;
1588 #endif /* CONFIG_TASKWATCH */
1589
1590 new_task->mem_notify_reserved = 0;
1591 new_task->memlimit_attrs_reserved = 0;
1592
1593 new_task->requested_policy = default_task_requested_policy;
1594 new_task->effective_policy = default_task_effective_policy;
1595
1596 new_task->task_shared_region_slide = -1;
1597
1598 if (parent_task != NULL) {
1599 task_ro_data.task_tokens.sec_token = *task_get_sec_token(parent_task);
1600 task_ro_data.task_tokens.audit_token = *task_get_audit_token(parent_task);
1601
1602 /* only inherit the option bits, no effect until task_set_immovable_pinned() */
1603 task_ro_data.task_control_port_options = task_get_control_port_options(parent_task);
1604
1605 task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1606 #if CONFIG_MACF
1607 if (!(t_flags & TF_CORPSE_FORK)) {
1608 task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(parent_task);
1609 task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(parent_task);
1610 }
1611 #endif
1612 } else {
1613 task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1614 task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1615
1616 task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1617 }
1618
1619 /* must set before task_importance_init_from_parent: */
1620 if (proc_ro != NULL) {
1621 new_task->bsd_info_ro = proc_ro_ref_task(proc_ro, new_task, &task_ro_data);
1622 } else {
1623 new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, new_task, &task_ro_data);
1624 }
1625
1626 ipc_task_init(new_task, parent_task);
1627
1628 task_importance_init_from_parent(new_task, parent_task);
1629
1630 new_task->corpse_vmobject_list = NULL;
1631
1632 if (parent_task != TASK_NULL) {
1633 /* inherit the parent's shared region */
1634 shared_region = vm_shared_region_get(parent_task);
1635 if (shared_region != NULL) {
1636 vm_shared_region_set(new_task, shared_region);
1637 }
1638
1639 #if __has_feature(ptrauth_calls)
1640 /* use parent's shared_region_id */
1641 char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1642 if (shared_region_id != NULL) {
1643 shared_region_key_alloc(shared_region_id, FALSE, 0); /* get a reference */
1644 }
1645 task_set_shared_region_id(new_task, shared_region_id);
1646 #endif /* __has_feature(ptrauth_calls) */
1647
1648 if (task_has_64Bit_addr(parent_task)) {
1649 task_set_64Bit_addr(new_task);
1650 }
1651
1652 if (task_has_64Bit_data(parent_task)) {
1653 task_set_64Bit_data(new_task);
1654 }
1655
1656 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1657 new_task->all_image_info_size = parent_task->all_image_info_size;
1658 new_task->mach_header_vm_address = 0;
1659
1660 if (inherit_memory && parent_task->affinity_space) {
1661 task_affinity_create(parent_task, new_task);
1662 }
1663
1664 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1665
1666 new_task->task_exc_guard = parent_task->task_exc_guard;
1667 if (parent_task->t_flags & TF_NO_SMT) {
1668 new_task->t_flags |= TF_NO_SMT;
1669 }
1670
1671 if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1672 new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1673 }
1674
1675 if (parent_task->t_flags & TF_TECS) {
1676 new_task->t_flags |= TF_TECS;
1677 }
1678
1679 #if defined(__x86_64__)
1680 if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1681 new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1682 }
1683 #endif
1684 new_task->priority = BASEPRI_DEFAULT;
1685 new_task->max_priority = MAXPRI_USER;
1686
1687 task_policy_create(new_task, parent_task);
1688 } else {
1689 #ifdef __LP64__
1690 if (is_64bit) {
1691 task_set_64Bit_addr(new_task);
1692 }
1693 #endif
1694
1695 if (is_64bit_data) {
1696 task_set_64Bit_data(new_task);
1697 }
1698
1699 new_task->all_image_info_addr = (mach_vm_address_t)0;
1700 new_task->all_image_info_size = (mach_vm_size_t)0;
1701
1702 new_task->pset_hint = PROCESSOR_SET_NULL;
1703
1704 new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1705
1706 if (new_task == kernel_task) {
1707 new_task->priority = BASEPRI_KERNEL;
1708 new_task->max_priority = MAXPRI_KERNEL;
1709 } else {
1710 new_task->priority = BASEPRI_DEFAULT;
1711 new_task->max_priority = MAXPRI_USER;
1712 }
1713 }
1714
1715 bzero(new_task->coalition, sizeof(new_task->coalition));
1716 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1717 queue_chain_init(new_task->task_coalition[i]);
1718 }
1719
1720 /* Allocate I/O Statistics */
1721 new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1722 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1723
1724 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1725 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1726
1727 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1728
1729 counter_alloc(&(new_task->pageins));
1730 counter_alloc(&(new_task->cow_faults));
1731 counter_alloc(&(new_task->messages_sent));
1732 counter_alloc(&(new_task->messages_received));
1733
1734 /* Copy resource acc. info from Parent for Corpe Forked task. */
1735 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1736 task_rollup_accounting_info(new_task, parent_task);
1737 task_store_owned_vmobject_info(new_task, parent_task);
1738 } else {
1739 /* Initialize to zero for standard fork/spawn case */
1740 new_task->total_runnable_time = 0;
1741 new_task->syscalls_mach = 0;
1742 new_task->syscalls_unix = 0;
1743 new_task->c_switch = 0;
1744 new_task->p_switch = 0;
1745 new_task->ps_switch = 0;
1746 new_task->decompressions = 0;
1747 new_task->low_mem_notified_warn = 0;
1748 new_task->low_mem_notified_critical = 0;
1749 new_task->purged_memory_warn = 0;
1750 new_task->purged_memory_critical = 0;
1751 new_task->low_mem_privileged_listener = 0;
1752 new_task->memlimit_is_active = 0;
1753 new_task->memlimit_is_fatal = 0;
1754 new_task->memlimit_active_exc_resource = 0;
1755 new_task->memlimit_inactive_exc_resource = 0;
1756 new_task->task_timer_wakeups_bin_1 = 0;
1757 new_task->task_timer_wakeups_bin_2 = 0;
1758 new_task->task_gpu_ns = 0;
1759 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1760 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1761 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1762 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1763 new_task->task_writes_counters_external.task_immediate_writes = 0;
1764 new_task->task_writes_counters_external.task_deferred_writes = 0;
1765 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1766 new_task->task_writes_counters_external.task_metadata_writes = 0;
1767 #if CONFIG_PHYS_WRITE_ACCT
1768 new_task->task_fs_metadata_writes = 0;
1769 #endif /* CONFIG_PHYS_WRITE_ACCT */
1770 }
1771
1772
1773 new_task->donates_own_pages = FALSE;
1774 #if CONFIG_COALITIONS
1775 if (!(t_flags & TF_CORPSE_FORK)) {
1776 /* TODO: there is no graceful failure path here... */
1777 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1778 coalitions_adopt_task(parent_coalitions, new_task);
1779 if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1780 new_task->donates_own_pages = coalition_is_swappable(parent_coalitions[COALITION_TYPE_JETSAM]);
1781 }
1782 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1783 /*
1784 * all tasks at least have a resource coalition, so
1785 * if the parent has one then inherit all coalitions
1786 * the parent is a part of
1787 */
1788 coalitions_adopt_task(parent_task->coalition, new_task);
1789 if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1790 new_task->donates_own_pages = coalition_is_swappable(parent_task->coalition[COALITION_TYPE_JETSAM]);
1791 }
1792 } else {
1793 /* TODO: assert that new_task will be PID 1 (launchd) */
1794 coalitions_adopt_init_task(new_task);
1795 }
1796 /*
1797 * on exec, we need to transfer the coalition roles from the
1798 * parent task to the exec copy task.
1799 */
1800 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1801 int coal_roles[COALITION_NUM_TYPES];
1802 task_coalition_roles(parent_task, coal_roles);
1803 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1804 }
1805 } else {
1806 coalitions_adopt_corpse_task(new_task);
1807 }
1808
1809 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1810 panic("created task is not a member of a resource coalition");
1811 }
1812 task_set_coalition_member(new_task);
1813 #endif /* CONFIG_COALITIONS */
1814
1815 new_task->dispatchqueue_offset = 0;
1816 if (parent_task != NULL) {
1817 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1818 }
1819
1820 new_task->task_can_transfer_memory_ownership = FALSE;
1821 new_task->task_volatile_objects = 0;
1822 new_task->task_nonvolatile_objects = 0;
1823 new_task->task_objects_disowning = FALSE;
1824 new_task->task_objects_disowned = FALSE;
1825 new_task->task_owned_objects = 0;
1826 queue_init(&new_task->task_objq);
1827
1828 #if CONFIG_FREEZE
1829 queue_init(&new_task->task_frozen_cseg_q);
1830 #endif /* CONFIG_FREEZE */
1831
1832 task_objq_lock_init(new_task);
1833
1834 #if __arm64__
1835 new_task->task_legacy_footprint = FALSE;
1836 new_task->task_extra_footprint_limit = FALSE;
1837 new_task->task_ios13extended_footprint_limit = FALSE;
1838 #endif /* __arm64__ */
1839 new_task->task_region_footprint = FALSE;
1840 new_task->task_has_crossed_thread_limit = FALSE;
1841 new_task->task_thread_limit = 0;
1842 #if CONFIG_SECLUDED_MEMORY
1843 new_task->task_can_use_secluded_mem = FALSE;
1844 new_task->task_could_use_secluded_mem = FALSE;
1845 new_task->task_could_also_use_secluded_mem = FALSE;
1846 new_task->task_suppressed_secluded = FALSE;
1847 #endif /* CONFIG_SECLUDED_MEMORY */
1848
1849 /*
1850 * t_flags is set up above. But since we don't
1851 * support darkwake mode being set that way
1852 * currently, we clear it out here explicitly.
1853 */
1854 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1855
1856 queue_init(&new_task->io_user_clients);
1857 new_task->loadTag = 0;
1858
1859 lck_mtx_lock(&tasks_threads_lock);
1860 queue_enter(&tasks, new_task, task_t, tasks);
1861 tasks_count++;
1862 if (tasks_suspend_state) {
1863 task_suspend_internal(new_task);
1864 }
1865 lck_mtx_unlock(&tasks_threads_lock);
1866 task_ref_hold_proc_task_struct(new_task);
1867
1868 return KERN_SUCCESS;
1869 }
1870
1871 /*
1872 * task_rollup_accounting_info
1873 *
1874 * Roll up accounting stats. Used to rollup stats
1875 * for exec copy task and corpse fork.
1876 */
1877 void
task_rollup_accounting_info(task_t to_task,task_t from_task)1878 task_rollup_accounting_info(task_t to_task, task_t from_task)
1879 {
1880 assert(from_task != to_task);
1881
1882 recount_task_copy(&to_task->tk_recount, &from_task->tk_recount);
1883 to_task->total_runnable_time = from_task->total_runnable_time;
1884 counter_add(&to_task->faults, counter_load(&from_task->faults));
1885 counter_add(&to_task->pageins, counter_load(&from_task->pageins));
1886 counter_add(&to_task->cow_faults, counter_load(&from_task->cow_faults));
1887 counter_add(&to_task->messages_sent, counter_load(&from_task->messages_sent));
1888 counter_add(&to_task->messages_received, counter_load(&from_task->messages_received));
1889 to_task->decompressions = from_task->decompressions;
1890 to_task->syscalls_mach = from_task->syscalls_mach;
1891 to_task->syscalls_unix = from_task->syscalls_unix;
1892 to_task->c_switch = from_task->c_switch;
1893 to_task->p_switch = from_task->p_switch;
1894 to_task->ps_switch = from_task->ps_switch;
1895 to_task->extmod_statistics = from_task->extmod_statistics;
1896 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1897 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1898 to_task->purged_memory_warn = from_task->purged_memory_warn;
1899 to_task->purged_memory_critical = from_task->purged_memory_critical;
1900 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1901 *to_task->task_io_stats = *from_task->task_io_stats;
1902 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1903 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1904 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1905 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1906 to_task->task_gpu_ns = from_task->task_gpu_ns;
1907 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1908 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1909 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1910 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1911 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1912 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1913 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1914 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1915 #if CONFIG_PHYS_WRITE_ACCT
1916 to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1917 #endif /* CONFIG_PHYS_WRITE_ACCT */
1918
1919 #if CONFIG_MEMORYSTATUS
1920 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.memorystatus_dirty_time);
1921 #endif /* CONFIG_MEMORYSTATUS */
1922
1923 /* Skip ledger roll up for memory accounting entries */
1924 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1925 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1926 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1927 #if CONFIG_SCHED_SFI
1928 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1929 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1930 }
1931 #endif
1932 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1933 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1934 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1935 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1936 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1937 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1938 }
1939
1940 /*
1941 * task_deallocate_internal:
1942 *
1943 * Drop a reference on a task.
1944 * Don't call this directly.
1945 */
1946 extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
1947 void
task_deallocate_internal(task_t task,os_ref_count_t refs)1948 task_deallocate_internal(
1949 task_t task,
1950 os_ref_count_t refs)
1951 {
1952 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1953
1954 if (task == TASK_NULL) {
1955 return;
1956 }
1957
1958 #if IMPORTANCE_INHERITANCE
1959 if (refs == 1) {
1960 /*
1961 * If last ref potentially comes from the task's importance,
1962 * disconnect it. But more task refs may be added before
1963 * that completes, so wait for the reference to go to zero
1964 * naturally (it may happen on a recursive task_deallocate()
1965 * from the ipc_importance_disconnect_task() call).
1966 */
1967 if (IIT_NULL != task->task_imp_base) {
1968 ipc_importance_disconnect_task(task);
1969 }
1970 return;
1971 }
1972 #endif /* IMPORTANCE_INHERITANCE */
1973
1974 if (refs > 0) {
1975 return;
1976 }
1977
1978 /*
1979 * The task should be dead at this point. Ensure other resources
1980 * like threads, are gone before we trash the world.
1981 */
1982 assert(queue_empty(&task->threads));
1983 assert(get_bsdtask_info(task) == NULL);
1984 assert(!is_active(task->itk_space));
1985 assert(!task->active);
1986 assert(task->active_thread_count == 0);
1987 assert(!task_get_game_mode(task));
1988
1989 lck_mtx_lock(&tasks_threads_lock);
1990 assert(terminated_tasks_count > 0);
1991 queue_remove(&terminated_tasks, task, task_t, tasks);
1992 terminated_tasks_count--;
1993 lck_mtx_unlock(&tasks_threads_lock);
1994
1995 /*
1996 * remove the reference on bank context
1997 */
1998 task_bank_reset(task);
1999
2000 kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
2001
2002 /*
2003 * Give the machine dependent code a chance
2004 * to perform cleanup before ripping apart
2005 * the task.
2006 */
2007 machine_task_terminate(task);
2008
2009 ipc_task_terminate(task);
2010
2011 /* let iokit know 2 */
2012 iokit_task_terminate(task, 2);
2013
2014 /* Unregister task from userspace coredumps on panic */
2015 kern_unregister_userspace_coredump(task);
2016
2017 if (task->affinity_space) {
2018 task_affinity_deallocate(task);
2019 }
2020
2021 #if MACH_ASSERT
2022 if (task->ledger != NULL &&
2023 task->map != NULL &&
2024 task->map->pmap != NULL &&
2025 task->map->pmap->ledger != NULL) {
2026 assert(task->ledger == task->map->pmap->ledger);
2027 }
2028 #endif /* MACH_ASSERT */
2029
2030 vm_owned_objects_disown(task);
2031 assert(task->task_objects_disowned);
2032 if (task->task_owned_objects != 0) {
2033 panic("task_deallocate(%p): "
2034 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
2035 task,
2036 task->task_volatile_objects,
2037 task->task_nonvolatile_objects,
2038 task->task_owned_objects);
2039 }
2040
2041 #if CONFIG_DEFERRED_RECLAIM
2042 if (task->deferred_reclamation_metadata != NULL) {
2043 vm_deferred_reclamation_buffer_deallocate(task->deferred_reclamation_metadata);
2044 task->deferred_reclamation_metadata = NULL;
2045 }
2046 #endif /* CONFIG_DEFERRED_RECLAIM */
2047
2048 vm_map_deallocate(task->map);
2049 if (task->is_large_corpse) {
2050 assert(large_corpse_count > 0);
2051 OSDecrementAtomic(&large_corpse_count);
2052 task->is_large_corpse = false;
2053 }
2054 is_release(task->itk_space);
2055
2056 if (task->t_rr_ranges) {
2057 restartable_ranges_release(task->t_rr_ranges);
2058 }
2059
2060 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2061 &interrupt_wakeups, &debit);
2062 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2063 &platform_idle_wakeups, &debit);
2064
2065 #if defined(CONFIG_SCHED_MULTIQ)
2066 sched_group_destroy(task->sched_group);
2067 #endif
2068
2069 struct recount_times_mach sum = { 0 };
2070 struct recount_times_mach p_only = { 0 };
2071 recount_task_times_perf_only(task, &sum, &p_only);
2072 #if CONFIG_PERVASIVE_ENERGY
2073 uint64_t energy = recount_task_energy_nj(task);
2074 #endif /* CONFIG_PERVASIVE_ENERGY */
2075 recount_task_deinit(&task->tk_recount);
2076
2077 /* Accumulate statistics for dead tasks */
2078 lck_spin_lock(&dead_task_statistics_lock);
2079 dead_task_statistics.total_user_time += sum.rtm_user;
2080 dead_task_statistics.total_system_time += sum.rtm_system;
2081
2082 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2083 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2084
2085 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2086 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2087 dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2088 dead_task_statistics.total_pset_switches += task->ps_switch;
2089 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2090 #if CONFIG_PERVASIVE_ENERGY
2091 dead_task_statistics.task_energy += energy;
2092 #endif /* CONFIG_PERVASIVE_ENERGY */
2093
2094 lck_spin_unlock(&dead_task_statistics_lock);
2095 lck_mtx_destroy(&task->lock, &task_lck_grp);
2096
2097 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
2098 &debit)) {
2099 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2100 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2101 }
2102 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
2103 &debit)) {
2104 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2105 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2106 }
2107 ledger_dereference(task->ledger);
2108
2109 counter_free(&task->faults);
2110 counter_free(&task->pageins);
2111 counter_free(&task->cow_faults);
2112 counter_free(&task->messages_sent);
2113 counter_free(&task->messages_received);
2114
2115 #if CONFIG_COALITIONS
2116 task_release_coalitions(task);
2117 #endif /* CONFIG_COALITIONS */
2118
2119 bzero(task->coalition, sizeof(task->coalition));
2120
2121 #if MACH_BSD
2122 /* clean up collected information since last reference to task is gone */
2123 if (task->corpse_info) {
2124 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
2125 task_crashinfo_destroy(task->corpse_info);
2126 task->corpse_info = NULL;
2127 kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2128 }
2129 #endif
2130
2131 #if CONFIG_MACF
2132 if (get_task_crash_label(task)) {
2133 mac_exc_free_label(get_task_crash_label(task));
2134 set_task_crash_label(task, NULL);
2135 }
2136 #endif
2137
2138 assert(queue_empty(&task->task_objq));
2139 task_objq_lock_destroy(task);
2140
2141 if (task->corpse_vmobject_list) {
2142 kfree_data(task->corpse_vmobject_list,
2143 (vm_size_t)task->corpse_vmobject_list_size);
2144 }
2145
2146 task_ref_count_fini(task);
2147 proc_ro_erase_task(task->bsd_info_ro);
2148 task_release_proc_task_struct(task);
2149 }
2150
2151 /*
2152 * task_name_deallocate_mig:
2153 *
2154 * Drop a reference on a task name.
2155 */
2156 void
task_name_deallocate_mig(task_name_t task_name)2157 task_name_deallocate_mig(
2158 task_name_t task_name)
2159 {
2160 return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2161 }
2162
2163 /*
2164 * task_policy_set_deallocate_mig:
2165 *
2166 * Drop a reference on a task type.
2167 */
2168 void
task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)2169 task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2170 {
2171 return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2172 }
2173
2174 /*
2175 * task_policy_get_deallocate_mig:
2176 *
2177 * Drop a reference on a task type.
2178 */
2179 void
task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)2180 task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2181 {
2182 return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2183 }
2184
2185 /*
2186 * task_inspect_deallocate_mig:
2187 *
2188 * Drop a task inspection reference.
2189 */
2190 void
task_inspect_deallocate_mig(task_inspect_t task_inspect)2191 task_inspect_deallocate_mig(
2192 task_inspect_t task_inspect)
2193 {
2194 return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2195 }
2196
2197 /*
2198 * task_read_deallocate_mig:
2199 *
2200 * Drop a reference on task read port.
2201 */
2202 void
task_read_deallocate_mig(task_read_t task_read)2203 task_read_deallocate_mig(
2204 task_read_t task_read)
2205 {
2206 return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2207 }
2208
2209 /*
2210 * task_suspension_token_deallocate:
2211 *
2212 * Drop a reference on a task suspension token.
2213 */
2214 void
task_suspension_token_deallocate(task_suspension_token_t token)2215 task_suspension_token_deallocate(
2216 task_suspension_token_t token)
2217 {
2218 return task_deallocate((task_t)token);
2219 }
2220
2221 void
task_suspension_token_deallocate_grp(task_suspension_token_t token,task_grp_t grp)2222 task_suspension_token_deallocate_grp(
2223 task_suspension_token_t token,
2224 task_grp_t grp)
2225 {
2226 return task_deallocate_grp((task_t)token, grp);
2227 }
2228
2229 /*
2230 * task_collect_crash_info:
2231 *
2232 * collect crash info from bsd and mach based data
2233 */
2234 kern_return_t
task_collect_crash_info(task_t task,struct label * crash_label,int is_corpse_fork)2235 task_collect_crash_info(
2236 task_t task,
2237 #ifdef CONFIG_MACF
2238 struct label *crash_label,
2239 #endif
2240 int is_corpse_fork)
2241 {
2242 kern_return_t kr = KERN_SUCCESS;
2243
2244 kcdata_descriptor_t crash_data = NULL;
2245 kcdata_descriptor_t crash_data_release = NULL;
2246 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2247 mach_vm_offset_t crash_data_ptr = 0;
2248 void *crash_data_kernel = NULL;
2249 void *crash_data_kernel_release = NULL;
2250 #if CONFIG_MACF
2251 struct label *label, *free_label;
2252 #endif
2253
2254 if (!corpses_enabled()) {
2255 return KERN_NOT_SUPPORTED;
2256 }
2257
2258 #if CONFIG_MACF
2259 free_label = label = mac_exc_create_label(NULL);
2260 #endif
2261
2262 task_lock(task);
2263
2264 assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2265 if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2266 #if CONFIG_MACF
2267 /* Set the crash label, used by the exception delivery mac hook */
2268 free_label = get_task_crash_label(task); // Most likely NULL.
2269 set_task_crash_label(task, label);
2270 mac_exc_update_task_crash_label(task, crash_label);
2271 #endif
2272 task_unlock(task);
2273
2274 crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2275 Z_WAITOK | Z_ZERO);
2276 if (crash_data_kernel == NULL) {
2277 kr = KERN_RESOURCE_SHORTAGE;
2278 goto out_no_lock;
2279 }
2280 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2281
2282 /* Do not get a corpse ref for corpse fork */
2283 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2284 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2285 KCFLAG_USE_MEMCOPY);
2286 if (crash_data) {
2287 task_lock(task);
2288 crash_data_release = task->corpse_info;
2289 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2290 task->corpse_info = crash_data;
2291
2292 task_unlock(task);
2293 kr = KERN_SUCCESS;
2294 } else {
2295 kfree_data(crash_data_kernel,
2296 CORPSEINFO_ALLOCATION_SIZE);
2297 kr = KERN_FAILURE;
2298 }
2299
2300 if (crash_data_release != NULL) {
2301 task_crashinfo_destroy(crash_data_release);
2302 }
2303 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2304 } else {
2305 task_unlock(task);
2306 }
2307
2308 out_no_lock:
2309 #if CONFIG_MACF
2310 if (free_label != NULL) {
2311 mac_exc_free_label(free_label);
2312 }
2313 #endif
2314 return kr;
2315 }
2316
2317 /*
2318 * task_deliver_crash_notification:
2319 *
2320 * Makes outcall to registered host port for a corpse.
2321 */
2322 kern_return_t
task_deliver_crash_notification(task_t corpse,thread_t thread,exception_type_t etype,mach_exception_subcode_t subcode)2323 task_deliver_crash_notification(
2324 task_t corpse, /* corpse or corpse fork */
2325 thread_t thread,
2326 exception_type_t etype,
2327 mach_exception_subcode_t subcode)
2328 {
2329 kcdata_descriptor_t crash_info = corpse->corpse_info;
2330 thread_t th_iter = NULL;
2331 kern_return_t kr = KERN_SUCCESS;
2332 wait_interrupt_t wsave;
2333 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2334 ipc_port_t corpse_port;
2335
2336 if (crash_info == NULL) {
2337 return KERN_FAILURE;
2338 }
2339
2340 assert(task_is_a_corpse(corpse));
2341
2342 task_lock(corpse);
2343
2344 /*
2345 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2346 * Crash reporters should derive whether it's fatal from corpse blob.
2347 */
2348 code[0] = etype;
2349 code[1] = subcode;
2350
2351 queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2352 {
2353 if (th_iter->corpse_dup == FALSE) {
2354 ipc_thread_reset(th_iter);
2355 }
2356 }
2357 task_unlock(corpse);
2358
2359 /* Arm the no-sender notification for taskport */
2360 task_reference(corpse);
2361 corpse_port = convert_corpse_to_port_and_nsrequest(corpse);
2362
2363 wsave = thread_interrupt_level(THREAD_UNINT);
2364 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2365 if (kr != KERN_SUCCESS) {
2366 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(corpse));
2367 }
2368
2369 (void)thread_interrupt_level(wsave);
2370
2371 /*
2372 * Drop the send right on corpse port, will fire the
2373 * no-sender notification if exception deliver failed.
2374 */
2375 ipc_port_release_send(corpse_port);
2376 return kr;
2377 }
2378
2379 /*
2380 * task_terminate:
2381 *
2382 * Terminate the specified task. See comments on thread_terminate
2383 * (kern/thread.c) about problems with terminating the "current task."
2384 */
2385
2386 kern_return_t
task_terminate(task_t task)2387 task_terminate(
2388 task_t task)
2389 {
2390 if (task == TASK_NULL) {
2391 return KERN_INVALID_ARGUMENT;
2392 }
2393
2394 if (get_bsdtask_info(task)) {
2395 return KERN_FAILURE;
2396 }
2397
2398 return task_terminate_internal(task);
2399 }
2400
2401 #if MACH_ASSERT
2402 extern int proc_pid(struct proc *);
2403 extern void proc_name_kdp(struct proc *p, char *buf, int size);
2404 #endif /* MACH_ASSERT */
2405
2406 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2407 static void
task_partial_reap(task_t task,__unused int pid)2408 __unused task_partial_reap(task_t task, __unused int pid)
2409 {
2410 unsigned int reclaimed_resident = 0;
2411 unsigned int reclaimed_compressed = 0;
2412 uint64_t task_page_count;
2413
2414 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2415
2416 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2417 pid, task_page_count, 0, 0, 0);
2418
2419 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2420
2421 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2422 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2423 }
2424
2425 /*
2426 * task_mark_corpse:
2427 *
2428 * Mark the task as a corpse. Called by crashing thread.
2429 */
2430 kern_return_t
task_mark_corpse(task_t task)2431 task_mark_corpse(task_t task)
2432 {
2433 kern_return_t kr = KERN_SUCCESS;
2434 thread_t self_thread;
2435 (void) self_thread;
2436 wait_interrupt_t wsave;
2437 #if CONFIG_MACF
2438 struct label *crash_label = NULL;
2439 #endif
2440
2441 assert(task != kernel_task);
2442 assert(task == current_task());
2443 assert(!task_is_a_corpse(task));
2444
2445 #if CONFIG_MACF
2446 crash_label = mac_exc_create_label_for_proc((struct proc*)get_bsdtask_info(task));
2447 #endif
2448
2449 kr = task_collect_crash_info(task,
2450 #if CONFIG_MACF
2451 crash_label,
2452 #endif
2453 FALSE);
2454 if (kr != KERN_SUCCESS) {
2455 goto out;
2456 }
2457
2458 self_thread = current_thread();
2459
2460 wsave = thread_interrupt_level(THREAD_UNINT);
2461 task_lock(task);
2462
2463 /*
2464 * Check if any other thread called task_terminate_internal
2465 * and made the task inactive before we could mark it for
2466 * corpse pending report. Bail out if the task is inactive.
2467 */
2468 if (!task->active) {
2469 kcdata_descriptor_t crash_data_release = task->corpse_info;;
2470 void *crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);;
2471
2472 task->corpse_info = NULL;
2473 task_unlock(task);
2474
2475 if (crash_data_release != NULL) {
2476 task_crashinfo_destroy(crash_data_release);
2477 }
2478 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2479 return KERN_TERMINATED;
2480 }
2481
2482 task_set_corpse_pending_report(task);
2483 task_set_corpse(task);
2484 task->crashed_thread_id = thread_tid(self_thread);
2485
2486 kr = task_start_halt_locked(task, TRUE);
2487 assert(kr == KERN_SUCCESS);
2488
2489 task_set_uniqueid(task);
2490
2491 task_unlock(task);
2492
2493 /*
2494 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2495 * disable old ports here instead.
2496 *
2497 * The vm_map and ipc_space must exist until this function returns,
2498 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2499 */
2500 ipc_task_disable(task);
2501
2502 /* let iokit know 1 */
2503 iokit_task_terminate(task, 1);
2504
2505 /* terminate the ipc space */
2506 ipc_space_terminate(task->itk_space);
2507
2508 /* Add it to global corpse task list */
2509 task_add_to_corpse_task_list(task);
2510
2511 thread_terminate_internal(self_thread);
2512
2513 (void) thread_interrupt_level(wsave);
2514 assert(task->halting == TRUE);
2515
2516 out:
2517 #if CONFIG_MACF
2518 mac_exc_free_label(crash_label);
2519 #endif
2520 return kr;
2521 }
2522
2523 /*
2524 * task_set_uniqueid
2525 *
2526 * Set task uniqueid to systemwide unique 64 bit value
2527 */
2528 void
task_set_uniqueid(task_t task)2529 task_set_uniqueid(task_t task)
2530 {
2531 task->task_uniqueid = OSIncrementAtomic64(&next_taskuniqueid);
2532 }
2533
2534 /*
2535 * task_clear_corpse
2536 *
2537 * Clears the corpse pending bit on task.
2538 * Removes inspection bit on the threads.
2539 */
2540 void
task_clear_corpse(task_t task)2541 task_clear_corpse(task_t task)
2542 {
2543 thread_t th_iter = NULL;
2544
2545 task_lock(task);
2546 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2547 {
2548 thread_mtx_lock(th_iter);
2549 th_iter->inspection = FALSE;
2550 ipc_thread_disable(th_iter);
2551 thread_mtx_unlock(th_iter);
2552 }
2553
2554 thread_terminate_crashed_threads();
2555 /* remove the pending corpse report flag */
2556 task_clear_corpse_pending_report(task);
2557
2558 task_unlock(task);
2559 }
2560
2561 /*
2562 * task_port_no_senders
2563 *
2564 * Called whenever the Mach port system detects no-senders on
2565 * the task port of a corpse.
2566 * Each notification that comes in should terminate the task (corpse).
2567 */
2568 static void
task_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)2569 task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2570 {
2571 task_t task = ipc_kobject_get_locked(port, IKOT_TASK_CONTROL);
2572
2573 assert(task != TASK_NULL);
2574 assert(task_is_a_corpse(task));
2575
2576 /* Remove the task from global corpse task list */
2577 task_remove_from_corpse_task_list(task);
2578
2579 task_clear_corpse(task);
2580 vm_map_unset_corpse_source(task->map);
2581 task_terminate_internal(task);
2582 }
2583
2584 /*
2585 * task_port_with_flavor_no_senders
2586 *
2587 * Called whenever the Mach port system detects no-senders on
2588 * the task inspect or read port. These ports are allocated lazily and
2589 * should be deallocated here when there are no senders remaining.
2590 */
2591 static void
task_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)2592 task_port_with_flavor_no_senders(
2593 ipc_port_t port,
2594 mach_port_mscount_t mscount __unused)
2595 {
2596 task_t task;
2597 mach_task_flavor_t flavor;
2598 ipc_kobject_type_t kotype;
2599
2600 ip_mq_lock(port);
2601 if (port->ip_srights > 0) {
2602 ip_mq_unlock(port);
2603 return;
2604 }
2605 kotype = ip_kotype(port);
2606 assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2607 task = ipc_kobject_get_locked(port, kotype);
2608 if (task != TASK_NULL) {
2609 task_reference(task);
2610 }
2611 ip_mq_unlock(port);
2612
2613 if (task == TASK_NULL) {
2614 /* The task is exiting or disabled; it will eventually deallocate the port */
2615 return;
2616 }
2617
2618 if (kotype == IKOT_TASK_READ) {
2619 flavor = TASK_FLAVOR_READ;
2620 } else {
2621 flavor = TASK_FLAVOR_INSPECT;
2622 }
2623
2624 itk_lock(task);
2625 ip_mq_lock(port);
2626
2627 /*
2628 * If the port is no longer active, then ipc_task_terminate() ran
2629 * and destroyed the kobject already. Just deallocate the task
2630 * ref we took and go away.
2631 *
2632 * It is also possible that several nsrequests are in flight,
2633 * only one shall NULL-out the port entry, and this is the one
2634 * that gets to dealloc the port.
2635 *
2636 * Check for a stale no-senders notification. A call to any function
2637 * that vends out send rights to this port could resurrect it between
2638 * this notification being generated and actually being handled here.
2639 */
2640 if (!ip_active(port) ||
2641 task->itk_task_ports[flavor] != port ||
2642 port->ip_srights > 0) {
2643 ip_mq_unlock(port);
2644 itk_unlock(task);
2645 task_deallocate(task);
2646 return;
2647 }
2648
2649 assert(task->itk_task_ports[flavor] == port);
2650 task->itk_task_ports[flavor] = IP_NULL;
2651 itk_unlock(task);
2652
2653 ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
2654
2655 task_deallocate(task);
2656 }
2657
2658 /*
2659 * task_wait_till_threads_terminate_locked
2660 *
2661 * Wait till all the threads in the task are terminated.
2662 * Might release the task lock and re-acquire it.
2663 */
2664 void
task_wait_till_threads_terminate_locked(task_t task)2665 task_wait_till_threads_terminate_locked(task_t task)
2666 {
2667 /* wait for all the threads in the task to terminate */
2668 while (task->active_thread_count != 0) {
2669 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2670 task_unlock(task);
2671 thread_block(THREAD_CONTINUE_NULL);
2672
2673 task_lock(task);
2674 }
2675 }
2676
2677 /*
2678 * task_duplicate_map_and_threads
2679 *
2680 * Copy vmmap of source task.
2681 * Copy active threads from source task to destination task.
2682 * Source task would be suspended during the copy.
2683 */
2684 kern_return_t
task_duplicate_map_and_threads(task_t task,void * p,task_t new_task,thread_t * thread_ret,uint64_t ** udata_buffer,int * size,int * num_udata,bool for_exception)2685 task_duplicate_map_and_threads(
2686 task_t task,
2687 void *p,
2688 task_t new_task,
2689 thread_t *thread_ret,
2690 uint64_t **udata_buffer,
2691 int *size,
2692 int *num_udata,
2693 bool for_exception)
2694 {
2695 kern_return_t kr = KERN_SUCCESS;
2696 int active;
2697 thread_t thread, self, thread_return = THREAD_NULL;
2698 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2699 thread_t *thread_array;
2700 uint32_t active_thread_count = 0, array_count = 0, i;
2701 vm_map_t oldmap;
2702 uint64_t *buffer = NULL;
2703 int buf_size = 0;
2704 int est_knotes = 0, num_knotes = 0;
2705
2706 self = current_thread();
2707
2708 /*
2709 * Suspend the task to copy thread state, use the internal
2710 * variant so that no user-space process can resume
2711 * the task from under us
2712 */
2713 kr = task_suspend_internal(task);
2714 if (kr != KERN_SUCCESS) {
2715 return kr;
2716 }
2717
2718 if (task->map->disable_vmentry_reuse == TRUE) {
2719 /*
2720 * Quite likely GuardMalloc (or some debugging tool)
2721 * is being used on this task. And it has gone through
2722 * its limit. Making a corpse will likely encounter
2723 * a lot of VM entries that will need COW.
2724 *
2725 * Skip it.
2726 */
2727 #if DEVELOPMENT || DEBUG
2728 memorystatus_abort_vm_map_fork(task);
2729 #endif
2730 ktriage_record(thread_tid(self), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CORPSE, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CORPSE_FAIL_LIBGMALLOC), 0 /* arg */);
2731 task_resume_internal(task);
2732 return KERN_FAILURE;
2733 }
2734
2735 /* Check with VM if vm_map_fork is allowed for this task */
2736 bool is_large = false;
2737 if (memorystatus_allowed_vm_map_fork(task, &is_large)) {
2738 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2739 oldmap = new_task->map;
2740 new_task->map = vm_map_fork(new_task->ledger,
2741 task->map,
2742 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2743 VM_MAP_FORK_PRESERVE_PURGEABLE |
2744 VM_MAP_FORK_CORPSE_FOOTPRINT));
2745 if (new_task->map) {
2746 new_task->is_large_corpse = is_large;
2747 vm_map_deallocate(oldmap);
2748
2749 /* copy ledgers that impact the memory footprint */
2750 vm_map_copy_footprint_ledgers(task, new_task);
2751
2752 /* Get all the udata pointers from kqueue */
2753 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2754 if (est_knotes > 0) {
2755 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2756 buffer = kalloc_data(buf_size, Z_WAITOK);
2757 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2758 if (num_knotes > est_knotes + 32) {
2759 num_knotes = est_knotes + 32;
2760 }
2761 }
2762 } else {
2763 if (is_large) {
2764 assert(large_corpse_count > 0);
2765 OSDecrementAtomic(&large_corpse_count);
2766 }
2767 new_task->map = oldmap;
2768 #if DEVELOPMENT || DEBUG
2769 memorystatus_abort_vm_map_fork(task);
2770 #endif
2771 task_resume_internal(task);
2772 return KERN_NO_SPACE;
2773 }
2774 } else if (!for_exception) {
2775 #if DEVELOPMENT || DEBUG
2776 memorystatus_abort_vm_map_fork(task);
2777 #endif
2778 task_resume_internal(task);
2779 return KERN_NO_SPACE;
2780 }
2781
2782 active_thread_count = task->active_thread_count;
2783 if (active_thread_count == 0) {
2784 kfree_data(buffer, buf_size);
2785 task_resume_internal(task);
2786 return KERN_FAILURE;
2787 }
2788
2789 thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2790
2791 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2792 task_lock(task);
2793 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2794 /* Skip inactive threads */
2795 active = thread->active;
2796 if (!active) {
2797 continue;
2798 }
2799
2800 if (array_count >= active_thread_count) {
2801 break;
2802 }
2803
2804 thread_array[array_count++] = thread;
2805 thread_reference(thread);
2806 }
2807 task_unlock(task);
2808
2809 for (i = 0; i < array_count; i++) {
2810 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2811 if (kr != KERN_SUCCESS) {
2812 break;
2813 }
2814
2815 /* Equivalent of current thread in corpse */
2816 if (thread_array[i] == self) {
2817 thread_return = new_thread;
2818 new_task->crashed_thread_id = thread_tid(new_thread);
2819 } else if (first_thread == NULL) {
2820 first_thread = new_thread;
2821 } else {
2822 /* drop the extra ref returned by thread_create_with_continuation */
2823 thread_deallocate(new_thread);
2824 }
2825
2826 kr = thread_dup2(thread_array[i], new_thread);
2827 if (kr != KERN_SUCCESS) {
2828 thread_mtx_lock(new_thread);
2829 new_thread->corpse_dup = TRUE;
2830 thread_mtx_unlock(new_thread);
2831 continue;
2832 }
2833
2834 /* Copy thread name */
2835 bsd_copythreadname(get_bsdthread_info(new_thread),
2836 get_bsdthread_info(thread_array[i]));
2837 new_thread->thread_tag = thread_array[i]->thread_tag &
2838 ~THREAD_TAG_USER_JOIN;
2839 thread_copy_resource_info(new_thread, thread_array[i]);
2840 }
2841
2842 /* return the first thread if we couldn't find the equivalent of current */
2843 if (thread_return == THREAD_NULL) {
2844 thread_return = first_thread;
2845 } else if (first_thread != THREAD_NULL) {
2846 /* drop the extra ref returned by thread_create_with_continuation */
2847 thread_deallocate(first_thread);
2848 }
2849
2850 task_resume_internal(task);
2851
2852 for (i = 0; i < array_count; i++) {
2853 thread_deallocate(thread_array[i]);
2854 }
2855 kfree_type(thread_t, active_thread_count, thread_array);
2856
2857 if (kr == KERN_SUCCESS) {
2858 *thread_ret = thread_return;
2859 *udata_buffer = buffer;
2860 *size = buf_size;
2861 *num_udata = num_knotes;
2862 } else {
2863 if (thread_return != THREAD_NULL) {
2864 thread_deallocate(thread_return);
2865 }
2866 kfree_data(buffer, buf_size);
2867 }
2868
2869 return kr;
2870 }
2871
2872 #if CONFIG_SECLUDED_MEMORY
2873 extern void task_set_can_use_secluded_mem_locked(
2874 task_t task,
2875 boolean_t can_use_secluded_mem);
2876 #endif /* CONFIG_SECLUDED_MEMORY */
2877
2878 #if MACH_ASSERT
2879 int debug4k_panic_on_terminate = 0;
2880 #endif /* MACH_ASSERT */
2881 kern_return_t
task_terminate_internal(task_t task)2882 task_terminate_internal(
2883 task_t task)
2884 {
2885 thread_t thread, self;
2886 task_t self_task;
2887 boolean_t interrupt_save;
2888 int pid = 0;
2889
2890 assert(task != kernel_task);
2891
2892 self = current_thread();
2893 self_task = current_task();
2894
2895 /*
2896 * Get the task locked and make sure that we are not racing
2897 * with someone else trying to terminate us.
2898 */
2899 if (task == self_task) {
2900 task_lock(task);
2901 } else if (task < self_task) {
2902 task_lock(task);
2903 task_lock(self_task);
2904 } else {
2905 task_lock(self_task);
2906 task_lock(task);
2907 }
2908
2909 #if CONFIG_SECLUDED_MEMORY
2910 if (task->task_can_use_secluded_mem) {
2911 task_set_can_use_secluded_mem_locked(task, FALSE);
2912 }
2913 task->task_could_use_secluded_mem = FALSE;
2914 task->task_could_also_use_secluded_mem = FALSE;
2915
2916 if (task->task_suppressed_secluded) {
2917 stop_secluded_suppression(task);
2918 }
2919 #endif /* CONFIG_SECLUDED_MEMORY */
2920
2921 if (!task->active) {
2922 /*
2923 * Task is already being terminated.
2924 * Just return an error. If we are dying, this will
2925 * just get us to our AST special handler and that
2926 * will get us to finalize the termination of ourselves.
2927 */
2928 task_unlock(task);
2929 if (self_task != task) {
2930 task_unlock(self_task);
2931 }
2932
2933 return KERN_FAILURE;
2934 }
2935
2936 if (task_corpse_pending_report(task)) {
2937 /*
2938 * Task is marked for reporting as corpse.
2939 * Just return an error. This will
2940 * just get us to our AST special handler and that
2941 * will get us to finish the path to death
2942 */
2943 task_unlock(task);
2944 if (self_task != task) {
2945 task_unlock(self_task);
2946 }
2947
2948 return KERN_FAILURE;
2949 }
2950
2951 if (self_task != task) {
2952 task_unlock(self_task);
2953 }
2954
2955 /*
2956 * Make sure the current thread does not get aborted out of
2957 * the waits inside these operations.
2958 */
2959 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2960
2961 /*
2962 * Indicate that we want all the threads to stop executing
2963 * at user space by holding the task (we would have held
2964 * each thread independently in thread_terminate_internal -
2965 * but this way we may be more likely to already find it
2966 * held there). Mark the task inactive, and prevent
2967 * further task operations via the task port.
2968 *
2969 * The vm_map and ipc_space must exist until this function returns,
2970 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2971 */
2972 task_hold_locked(task);
2973 task->active = FALSE;
2974 ipc_task_disable(task);
2975
2976 #if CONFIG_TELEMETRY
2977 /*
2978 * Notify telemetry that this task is going away.
2979 */
2980 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2981 #endif
2982
2983 /*
2984 * Terminate each thread in the task.
2985 */
2986 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2987 thread_terminate_internal(thread);
2988 }
2989
2990 #ifdef MACH_BSD
2991 void *bsd_info = get_bsdtask_info(task);
2992 if (bsd_info != NULL) {
2993 pid = proc_pid(bsd_info);
2994 }
2995 #endif /* MACH_BSD */
2996
2997 task_unlock(task);
2998
2999 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3000 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3001
3002 /* Early object reap phase */
3003
3004 // PR-17045188: Revisit implementation
3005 // task_partial_reap(task, pid);
3006
3007 #if CONFIG_TASKWATCH
3008 /*
3009 * remove all task watchers
3010 */
3011 task_removewatchers(task);
3012
3013 #endif /* CONFIG_TASKWATCH */
3014
3015 /*
3016 * Destroy all synchronizers owned by the task.
3017 */
3018 task_synchronizer_destroy_all(task);
3019
3020 /*
3021 * Clear the watchport boost on the task.
3022 */
3023 task_remove_turnstile_watchports(task);
3024
3025 /* let iokit know 1 */
3026 iokit_task_terminate(task, 1);
3027
3028 /*
3029 * Destroy the IPC space, leaving just a reference for it.
3030 */
3031 ipc_space_terminate(task->itk_space);
3032
3033 #if 00
3034 /* if some ledgers go negative on tear-down again... */
3035 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3036 task_ledgers.phys_footprint);
3037 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3038 task_ledgers.internal);
3039 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3040 task_ledgers.iokit_mapped);
3041 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3042 task_ledgers.alternate_accounting);
3043 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3044 task_ledgers.alternate_accounting_compressed);
3045 #endif
3046
3047 #if CONFIG_DEFERRED_RECLAIM
3048 /*
3049 * Remove this tasks reclaim buffer from global queues.
3050 */
3051 if (task->deferred_reclamation_metadata != NULL) {
3052 vm_deferred_reclamation_buffer_uninstall(task->deferred_reclamation_metadata);
3053 }
3054 #endif /* CONFIG_DEFERRED_RECLAIM */
3055
3056 /*
3057 * If the current thread is a member of the task
3058 * being terminated, then the last reference to
3059 * the task will not be dropped until the thread
3060 * is finally reaped. To avoid incurring the
3061 * expense of removing the address space regions
3062 * at reap time, we do it explictly here.
3063 */
3064
3065 #if MACH_ASSERT
3066 /*
3067 * Identify the pmap's process, in case the pmap ledgers drift
3068 * and we have to report it.
3069 */
3070 char procname[17];
3071 void *proc = get_bsdtask_info(task);
3072 if (proc) {
3073 pid = proc_pid(proc);
3074 proc_name_kdp(proc, procname, sizeof(procname));
3075 } else {
3076 pid = 0;
3077 strlcpy(procname, "<unknown>", sizeof(procname));
3078 }
3079 pmap_set_process(task->map->pmap, pid, procname);
3080 if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3081 DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3082 if (debug4k_panic_on_terminate) {
3083 panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3084 }
3085 }
3086 #endif /* MACH_ASSERT */
3087
3088 vm_map_terminate(task->map);
3089
3090 /* release our shared region */
3091 vm_shared_region_set(task, NULL);
3092
3093 #if __has_feature(ptrauth_calls)
3094 task_set_shared_region_id(task, NULL);
3095 #endif /* __has_feature(ptrauth_calls) */
3096
3097 lck_mtx_lock(&tasks_threads_lock);
3098 queue_remove(&tasks, task, task_t, tasks);
3099 queue_enter(&terminated_tasks, task, task_t, tasks);
3100 tasks_count--;
3101 terminated_tasks_count++;
3102 lck_mtx_unlock(&tasks_threads_lock);
3103
3104 /*
3105 * We no longer need to guard against being aborted, so restore
3106 * the previous interruptible state.
3107 */
3108 thread_interrupt_level(interrupt_save);
3109
3110 #if KPC
3111 /* force the task to release all ctrs */
3112 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3113 kpc_force_all_ctrs(task, 0);
3114 }
3115 #endif /* KPC */
3116
3117 #if CONFIG_COALITIONS
3118 /*
3119 * Leave the coalition for corpse task or task that
3120 * never had any active threads (e.g. fork, exec failure).
3121 * For task with active threads, the task will be removed
3122 * from coalition by last terminating thread.
3123 */
3124 if (task->active_thread_count == 0) {
3125 coalitions_remove_task(task);
3126 }
3127 #endif
3128
3129 #if CONFIG_FREEZE
3130 extern int vm_compressor_available;
3131 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3132 task_disown_frozen_csegs(task);
3133 assert(queue_empty(&task->task_frozen_cseg_q));
3134 }
3135 #endif /* CONFIG_FREEZE */
3136
3137
3138 /*
3139 * Get rid of the task active reference on itself.
3140 */
3141 task_deallocate_grp(task, TASK_GRP_INTERNAL);
3142
3143 return KERN_SUCCESS;
3144 }
3145
3146 void
tasks_system_suspend(boolean_t suspend)3147 tasks_system_suspend(boolean_t suspend)
3148 {
3149 task_t task;
3150
3151 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3152 (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3153
3154 lck_mtx_lock(&tasks_threads_lock);
3155 assert(tasks_suspend_state != suspend);
3156 tasks_suspend_state = suspend;
3157 queue_iterate(&tasks, task, task_t, tasks) {
3158 if (task == kernel_task) {
3159 continue;
3160 }
3161 suspend ? task_suspend_internal(task) : task_resume_internal(task);
3162 }
3163 lck_mtx_unlock(&tasks_threads_lock);
3164 }
3165
3166 /*
3167 * task_start_halt:
3168 *
3169 * Shut the current task down (except for the current thread) in
3170 * preparation for dramatic changes to the task (probably exec).
3171 * We hold the task and mark all other threads in the task for
3172 * termination.
3173 */
3174 kern_return_t
task_start_halt(task_t task)3175 task_start_halt(task_t task)
3176 {
3177 kern_return_t kr = KERN_SUCCESS;
3178 task_lock(task);
3179 kr = task_start_halt_locked(task, FALSE);
3180 task_unlock(task);
3181 return kr;
3182 }
3183
3184 static kern_return_t
task_start_halt_locked(task_t task,boolean_t should_mark_corpse)3185 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3186 {
3187 thread_t thread, self;
3188 uint64_t dispatchqueue_offset;
3189
3190 assert(task != kernel_task);
3191
3192 self = current_thread();
3193
3194 if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3195 return KERN_INVALID_ARGUMENT;
3196 }
3197
3198 if (!should_mark_corpse &&
3199 (task->halting || !task->active || !self->active)) {
3200 /*
3201 * Task or current thread is already being terminated.
3202 * Hurry up and return out of the current kernel context
3203 * so that we run our AST special handler to terminate
3204 * ourselves. If should_mark_corpse is set, the corpse
3205 * creation might have raced with exec, let the corpse
3206 * creation continue, once the current thread reaches AST
3207 * thread in exec will be woken up from task_complete_halt.
3208 * Exec will fail cause the proc was marked for exit.
3209 * Once the thread in exec reaches AST, it will call proc_exit
3210 * and deliver the EXC_CORPSE_NOTIFY.
3211 */
3212 return KERN_FAILURE;
3213 }
3214
3215 /* Thread creation will fail after this point of no return. */
3216 task->halting = TRUE;
3217
3218 /*
3219 * Mark all the threads to keep them from starting any more
3220 * user-level execution. The thread_terminate_internal code
3221 * would do this on a thread by thread basis anyway, but this
3222 * gives us a better chance of not having to wait there.
3223 */
3224 task_hold_locked(task);
3225 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3226
3227 /*
3228 * Terminate all the other threads in the task.
3229 */
3230 queue_iterate(&task->threads, thread, thread_t, task_threads)
3231 {
3232 /*
3233 * Remove priority throttles for threads to terminate timely. This has
3234 * to be done after task_hold_locked() traps all threads to AST, but before
3235 * threads are marked inactive in thread_terminate_internal(). Takes thread
3236 * mutex lock.
3237 *
3238 * We need task_is_a_corpse() check so that we don't accidently update policy
3239 * for tasks that are doing posix_spawn().
3240 *
3241 * See: thread_policy_update_tasklocked().
3242 */
3243 if (task_is_a_corpse(task)) {
3244 proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3245 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3246 }
3247
3248 if (should_mark_corpse) {
3249 thread_mtx_lock(thread);
3250 thread->inspection = TRUE;
3251 thread_mtx_unlock(thread);
3252 }
3253 if (thread != self) {
3254 thread_terminate_internal(thread);
3255 }
3256 }
3257 task->dispatchqueue_offset = dispatchqueue_offset;
3258
3259 task_release_locked(task);
3260
3261 return KERN_SUCCESS;
3262 }
3263
3264
3265 /*
3266 * task_complete_halt:
3267 *
3268 * Complete task halt by waiting for threads to terminate, then clean
3269 * up task resources (VM, port namespace, etc...) and then let the
3270 * current thread go in the (practically empty) task context.
3271 *
3272 * Note: task->halting flag is not cleared in order to avoid creation
3273 * of new thread in old exec'ed task.
3274 */
3275 void
task_complete_halt(task_t task)3276 task_complete_halt(task_t task)
3277 {
3278 task_lock(task);
3279 assert(task->halting);
3280 assert(task == current_task());
3281
3282 /*
3283 * Wait for the other threads to get shut down.
3284 * When the last other thread is reaped, we'll be
3285 * woken up.
3286 */
3287 if (task->thread_count > 1) {
3288 assert_wait((event_t)&task->halting, THREAD_UNINT);
3289 task_unlock(task);
3290 thread_block(THREAD_CONTINUE_NULL);
3291 } else {
3292 task_unlock(task);
3293 }
3294
3295 #if CONFIG_DEFERRED_RECLAIM
3296 if (task->deferred_reclamation_metadata) {
3297 vm_deferred_reclamation_buffer_uninstall(
3298 task->deferred_reclamation_metadata);
3299 vm_deferred_reclamation_buffer_deallocate(
3300 task->deferred_reclamation_metadata);
3301 task->deferred_reclamation_metadata = NULL;
3302 }
3303 #endif /* CONFIG_DEFERRED_RECLAIM */
3304
3305 /*
3306 * Give the machine dependent code a chance
3307 * to perform cleanup of task-level resources
3308 * associated with the current thread before
3309 * ripping apart the task.
3310 */
3311 machine_task_terminate(task);
3312
3313 /*
3314 * Destroy all synchronizers owned by the task.
3315 */
3316 task_synchronizer_destroy_all(task);
3317
3318 /* let iokit know 1 */
3319 iokit_task_terminate(task, 1);
3320
3321 /*
3322 * Terminate the IPC space. A long time ago,
3323 * this used to be ipc_space_clean() which would
3324 * keep the space active but hollow it.
3325 *
3326 * We really do not need this semantics given
3327 * tasks die with exec now.
3328 */
3329 ipc_space_terminate(task->itk_space);
3330
3331 /*
3332 * Clean out the address space, as we are going to be
3333 * getting a new one.
3334 */
3335 vm_map_terminate(task->map);
3336
3337 /*
3338 * Kick out any IOKitUser handles to the task. At best they're stale,
3339 * at worst someone is racing a SUID exec.
3340 */
3341 /* let iokit know 2 */
3342 iokit_task_terminate(task, 2);
3343 }
3344
3345 #ifdef CONFIG_TASK_SUSPEND_STATS
3346
3347 static void
_task_mark_suspend_source(task_t task)3348 _task_mark_suspend_source(task_t task)
3349 {
3350 int idx;
3351 task_suspend_stats_t stats;
3352 task_suspend_source_t source;
3353 task_lock_assert_owned(task);
3354 stats = &task->t_suspend_stats;
3355
3356 idx = stats->tss_count % TASK_SUSPEND_SOURCES_MAX;
3357 source = &task->t_suspend_sources[idx];
3358 bzero(source, sizeof(*source));
3359
3360 source->tss_time = mach_absolute_time();
3361 source->tss_tid = current_thread()->thread_id;
3362 source->tss_pid = task_pid(current_task());
3363 task_best_name(current_task(), source->tss_procname, sizeof(source->tss_procname));
3364
3365 stats->tss_count++;
3366 }
3367
3368 static inline void
_task_mark_suspend_start(task_t task)3369 _task_mark_suspend_start(task_t task)
3370 {
3371 task_lock_assert_owned(task);
3372 task->t_suspend_stats.tss_last_start = mach_absolute_time();
3373 }
3374
3375 static inline void
_task_mark_suspend_end(task_t task)3376 _task_mark_suspend_end(task_t task)
3377 {
3378 task_lock_assert_owned(task);
3379 task->t_suspend_stats.tss_last_end = mach_absolute_time();
3380 task->t_suspend_stats.tss_duration += (task->t_suspend_stats.tss_last_end -
3381 task->t_suspend_stats.tss_last_start);
3382 }
3383
3384 static kern_return_t
_task_get_suspend_stats_locked(task_t task,task_suspend_stats_t stats)3385 _task_get_suspend_stats_locked(task_t task, task_suspend_stats_t stats)
3386 {
3387 if (task == TASK_NULL || stats == NULL) {
3388 return KERN_INVALID_ARGUMENT;
3389 }
3390 task_lock_assert_owned(task);
3391 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3392 return KERN_SUCCESS;
3393 }
3394
3395 static kern_return_t
_task_get_suspend_sources_locked(task_t task,task_suspend_source_t sources)3396 _task_get_suspend_sources_locked(task_t task, task_suspend_source_t sources)
3397 {
3398 if (task == TASK_NULL || sources == NULL) {
3399 return KERN_INVALID_ARGUMENT;
3400 }
3401 task_lock_assert_owned(task);
3402 memcpy(sources, task->t_suspend_sources,
3403 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3404 return KERN_SUCCESS;
3405 }
3406
3407 #endif /* CONFIG_TASK_SUSPEND_STATS */
3408
3409 kern_return_t
task_get_suspend_stats(task_t task,task_suspend_stats_t stats)3410 task_get_suspend_stats(task_t task, task_suspend_stats_t stats)
3411 {
3412 #ifdef CONFIG_TASK_SUSPEND_STATS
3413 kern_return_t kr;
3414 if (task == TASK_NULL || stats == NULL) {
3415 return KERN_INVALID_ARGUMENT;
3416 }
3417 task_lock(task);
3418 kr = _task_get_suspend_stats_locked(task, stats);
3419 task_unlock(task);
3420 return kr;
3421 #else /* CONFIG_TASK_SUSPEND_STATS */
3422 (void)task;
3423 (void)stats;
3424 return KERN_NOT_SUPPORTED;
3425 #endif
3426 }
3427
3428 kern_return_t
task_get_suspend_stats_kdp(task_t task,task_suspend_stats_t stats)3429 task_get_suspend_stats_kdp(task_t task, task_suspend_stats_t stats)
3430 {
3431 #ifdef CONFIG_TASK_SUSPEND_STATS
3432 if (task == TASK_NULL || stats == NULL) {
3433 return KERN_INVALID_ARGUMENT;
3434 }
3435 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3436 return KERN_SUCCESS;
3437 #else /* CONFIG_TASK_SUSPEND_STATS */
3438 #pragma unused(task, stats)
3439 return KERN_NOT_SUPPORTED;
3440 #endif /* CONFIG_TASK_SUSPEND_STATS */
3441 }
3442
3443 kern_return_t
task_get_suspend_sources(task_t task,task_suspend_source_array_t sources)3444 task_get_suspend_sources(task_t task, task_suspend_source_array_t sources)
3445 {
3446 #ifdef CONFIG_TASK_SUSPEND_STATS
3447 kern_return_t kr;
3448 if (task == TASK_NULL || sources == NULL) {
3449 return KERN_INVALID_ARGUMENT;
3450 }
3451 task_lock(task);
3452 kr = _task_get_suspend_sources_locked(task, sources);
3453 task_unlock(task);
3454 return kr;
3455 #else /* CONFIG_TASK_SUSPEND_STATS */
3456 (void)task;
3457 (void)sources;
3458 return KERN_NOT_SUPPORTED;
3459 #endif
3460 }
3461
3462 kern_return_t
task_get_suspend_sources_kdp(task_t task,task_suspend_source_array_t sources)3463 task_get_suspend_sources_kdp(task_t task, task_suspend_source_array_t sources)
3464 {
3465 #ifdef CONFIG_TASK_SUSPEND_STATS
3466 if (task == TASK_NULL || sources == NULL) {
3467 return KERN_INVALID_ARGUMENT;
3468 }
3469 memcpy(sources, task->t_suspend_sources,
3470 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3471 return KERN_SUCCESS;
3472 #else /* CONFIG_TASK_SUSPEND_STATS */
3473 #pragma unused(task, sources)
3474 return KERN_NOT_SUPPORTED;
3475 #endif
3476 }
3477
3478 /*
3479 * task_hold_locked:
3480 *
3481 * Suspend execution of the specified task.
3482 * This is a recursive-style suspension of the task, a count of
3483 * suspends is maintained.
3484 *
3485 * CONDITIONS: the task is locked and active.
3486 */
3487 void
task_hold_locked(task_t task)3488 task_hold_locked(
3489 task_t task)
3490 {
3491 thread_t thread;
3492 void *bsd_info = get_bsdtask_info(task);
3493
3494 assert(task->active);
3495
3496 if (task->suspend_count++ > 0) {
3497 return;
3498 }
3499
3500 if (bsd_info) {
3501 workq_proc_suspended(bsd_info);
3502 }
3503
3504 /*
3505 * Iterate through all the threads and hold them.
3506 */
3507 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3508 thread_mtx_lock(thread);
3509 thread_hold(thread);
3510 thread_mtx_unlock(thread);
3511 }
3512
3513 #ifdef CONFIG_TASK_SUSPEND_STATS
3514 _task_mark_suspend_start(task);
3515 #endif
3516 }
3517
3518 /*
3519 * task_hold:
3520 *
3521 * Same as the internal routine above, except that is must lock
3522 * and verify that the task is active. This differs from task_suspend
3523 * in that it places a kernel hold on the task rather than just a
3524 * user-level hold. This keeps users from over resuming and setting
3525 * it running out from under the kernel.
3526 *
3527 * CONDITIONS: the caller holds a reference on the task
3528 */
3529 kern_return_t
task_hold(task_t task)3530 task_hold(
3531 task_t task)
3532 {
3533 if (task == TASK_NULL) {
3534 return KERN_INVALID_ARGUMENT;
3535 }
3536
3537 task_lock(task);
3538
3539 if (!task->active) {
3540 task_unlock(task);
3541
3542 return KERN_FAILURE;
3543 }
3544
3545 #ifdef CONFIG_TASK_SUSPEND_STATS
3546 _task_mark_suspend_source(task);
3547 #endif /* CONFIG_TASK_SUSPEND_STATS */
3548 task_hold_locked(task);
3549 task_unlock(task);
3550
3551 return KERN_SUCCESS;
3552 }
3553
3554 kern_return_t
task_wait(task_t task,boolean_t until_not_runnable)3555 task_wait(
3556 task_t task,
3557 boolean_t until_not_runnable)
3558 {
3559 if (task == TASK_NULL) {
3560 return KERN_INVALID_ARGUMENT;
3561 }
3562
3563 task_lock(task);
3564
3565 if (!task->active) {
3566 task_unlock(task);
3567
3568 return KERN_FAILURE;
3569 }
3570
3571 task_wait_locked(task, until_not_runnable);
3572 task_unlock(task);
3573
3574 return KERN_SUCCESS;
3575 }
3576
3577 /*
3578 * task_wait_locked:
3579 *
3580 * Wait for all threads in task to stop.
3581 *
3582 * Conditions:
3583 * Called with task locked, active, and held.
3584 */
3585 void
task_wait_locked(task_t task,boolean_t until_not_runnable)3586 task_wait_locked(
3587 task_t task,
3588 boolean_t until_not_runnable)
3589 {
3590 thread_t thread, self;
3591
3592 assert(task->active);
3593 assert(task->suspend_count > 0);
3594
3595 self = current_thread();
3596
3597 /*
3598 * Iterate through all the threads and wait for them to
3599 * stop. Do not wait for the current thread if it is within
3600 * the task.
3601 */
3602 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3603 if (thread != self) {
3604 thread_wait(thread, until_not_runnable);
3605 }
3606 }
3607 }
3608
3609 boolean_t
task_is_app_suspended(task_t task)3610 task_is_app_suspended(task_t task)
3611 {
3612 return task->pidsuspended;
3613 }
3614
3615 /*
3616 * task_release_locked:
3617 *
3618 * Release a kernel hold on a task.
3619 *
3620 * CONDITIONS: the task is locked and active
3621 */
3622 void
task_release_locked(task_t task)3623 task_release_locked(
3624 task_t task)
3625 {
3626 thread_t thread;
3627 void *bsd_info = get_bsdtask_info(task);
3628
3629 assert(task->active);
3630 assert(task->suspend_count > 0);
3631
3632 if (--task->suspend_count > 0) {
3633 return;
3634 }
3635
3636 if (bsd_info) {
3637 workq_proc_resumed(bsd_info);
3638 }
3639
3640 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3641 thread_mtx_lock(thread);
3642 thread_release(thread);
3643 thread_mtx_unlock(thread);
3644 }
3645
3646 #if CONFIG_TASK_SUSPEND_STATS
3647 _task_mark_suspend_end(task);
3648 #endif
3649 }
3650
3651 /*
3652 * task_release:
3653 *
3654 * Same as the internal routine above, except that it must lock
3655 * and verify that the task is active.
3656 *
3657 * CONDITIONS: The caller holds a reference to the task
3658 */
3659 kern_return_t
task_release(task_t task)3660 task_release(
3661 task_t task)
3662 {
3663 if (task == TASK_NULL) {
3664 return KERN_INVALID_ARGUMENT;
3665 }
3666
3667 task_lock(task);
3668
3669 if (!task->active) {
3670 task_unlock(task);
3671
3672 return KERN_FAILURE;
3673 }
3674
3675 task_release_locked(task);
3676 task_unlock(task);
3677
3678 return KERN_SUCCESS;
3679 }
3680
3681 static kern_return_t
task_threads_internal(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * countp,mach_thread_flavor_t flavor)3682 task_threads_internal(
3683 task_t task,
3684 thread_act_array_t *threads_out,
3685 mach_msg_type_number_t *countp,
3686 mach_thread_flavor_t flavor)
3687 {
3688 mach_msg_type_number_t actual, count, count_needed;
3689 thread_t *thread_list;
3690 thread_t thread;
3691 unsigned int i;
3692
3693 count = 0;
3694 thread_list = NULL;
3695
3696 if (task == TASK_NULL) {
3697 return KERN_INVALID_ARGUMENT;
3698 }
3699
3700 assert(flavor <= THREAD_FLAVOR_INSPECT);
3701
3702 for (;;) {
3703 task_lock(task);
3704 if (!task->active) {
3705 task_unlock(task);
3706
3707 kfree_type(thread_t, count, thread_list);
3708 return KERN_FAILURE;
3709 }
3710
3711 count_needed = actual = task->thread_count;
3712 if (count_needed <= count) {
3713 break;
3714 }
3715
3716 /* unlock the task and allocate more memory */
3717 task_unlock(task);
3718
3719 kfree_type(thread_t, count, thread_list);
3720 count = count_needed;
3721 thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3722
3723 if (thread_list == NULL) {
3724 return KERN_RESOURCE_SHORTAGE;
3725 }
3726 }
3727
3728 i = 0;
3729 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3730 assert(i < actual);
3731 thread_reference(thread);
3732 thread_list[i++] = thread;
3733 }
3734
3735 count_needed = actual;
3736
3737 /* can unlock task now that we've got the thread refs */
3738 task_unlock(task);
3739
3740 if (actual == 0) {
3741 /* no threads, so return null pointer and deallocate memory */
3742
3743 *threads_out = NULL;
3744 *countp = 0;
3745 kfree_type(thread_t, count, thread_list);
3746 } else {
3747 /* if we allocated too much, must copy */
3748 if (count_needed < count) {
3749 void *newaddr;
3750
3751 newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3752 if (newaddr == NULL) {
3753 for (i = 0; i < actual; ++i) {
3754 thread_deallocate(thread_list[i]);
3755 }
3756 kfree_type(thread_t, count, thread_list);
3757 return KERN_RESOURCE_SHORTAGE;
3758 }
3759
3760 bcopy(thread_list, newaddr, count_needed * sizeof(thread_t));
3761 kfree_type(thread_t, count, thread_list);
3762 thread_list = (thread_t *)newaddr;
3763 }
3764
3765 *threads_out = thread_list;
3766 *countp = actual;
3767
3768 /* do the conversion that Mig should handle */
3769
3770 switch (flavor) {
3771 case THREAD_FLAVOR_CONTROL:
3772 if (task == current_task()) {
3773 for (i = 0; i < actual; ++i) {
3774 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3775 }
3776 } else {
3777 for (i = 0; i < actual; ++i) {
3778 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3779 }
3780 }
3781 break;
3782 case THREAD_FLAVOR_READ:
3783 for (i = 0; i < actual; ++i) {
3784 ((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3785 }
3786 break;
3787 case THREAD_FLAVOR_INSPECT:
3788 for (i = 0; i < actual; ++i) {
3789 ((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3790 }
3791 break;
3792 }
3793 }
3794
3795 return KERN_SUCCESS;
3796 }
3797
3798 kern_return_t
task_threads(task_t task,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3799 task_threads(
3800 task_t task,
3801 thread_act_array_t *threads_out,
3802 mach_msg_type_number_t *count)
3803 {
3804 return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3805 }
3806
3807
3808 kern_return_t
task_threads_from_user(mach_port_t port,thread_act_array_t * threads_out,mach_msg_type_number_t * count)3809 task_threads_from_user(
3810 mach_port_t port,
3811 thread_act_array_t *threads_out,
3812 mach_msg_type_number_t *count)
3813 {
3814 ipc_kobject_type_t kotype;
3815 kern_return_t kr;
3816
3817 task_t task = convert_port_to_task_inspect_no_eval(port);
3818
3819 if (task == TASK_NULL) {
3820 return KERN_INVALID_ARGUMENT;
3821 }
3822
3823 kotype = ip_kotype(port);
3824
3825 switch (kotype) {
3826 case IKOT_TASK_CONTROL:
3827 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3828 break;
3829 case IKOT_TASK_READ:
3830 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3831 break;
3832 case IKOT_TASK_INSPECT:
3833 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3834 break;
3835 default:
3836 panic("strange kobject type");
3837 break;
3838 }
3839
3840 task_deallocate(task);
3841 return kr;
3842 }
3843
3844 #define TASK_HOLD_NORMAL 0
3845 #define TASK_HOLD_PIDSUSPEND 1
3846 #define TASK_HOLD_LEGACY 2
3847 #define TASK_HOLD_LEGACY_ALL 3
3848
3849 static kern_return_t
place_task_hold(task_t task,int mode)3850 place_task_hold(
3851 task_t task,
3852 int mode)
3853 {
3854 if (!task->active && !task_is_a_corpse(task)) {
3855 return KERN_FAILURE;
3856 }
3857
3858 /* Return success for corpse task */
3859 if (task_is_a_corpse(task)) {
3860 return KERN_SUCCESS;
3861 }
3862
3863 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3864 task_pid(task),
3865 task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3866 task->user_stop_count, task->user_stop_count + 1);
3867
3868 #if MACH_ASSERT
3869 current_task()->suspends_outstanding++;
3870 #endif
3871
3872 if (mode == TASK_HOLD_LEGACY) {
3873 task->legacy_stop_count++;
3874 }
3875
3876 #ifdef CONFIG_TASK_SUSPEND_STATS
3877 _task_mark_suspend_source(task);
3878 #endif /* CONFIG_TASK_SUSPEND_STATS */
3879
3880 if (task->user_stop_count++ > 0) {
3881 /*
3882 * If the stop count was positive, the task is
3883 * already stopped and we can exit.
3884 */
3885 return KERN_SUCCESS;
3886 }
3887
3888 /*
3889 * Put a kernel-level hold on the threads in the task (all
3890 * user-level task suspensions added together represent a
3891 * single kernel-level hold). We then wait for the threads
3892 * to stop executing user code.
3893 */
3894 task_hold_locked(task);
3895 task_wait_locked(task, FALSE);
3896
3897 return KERN_SUCCESS;
3898 }
3899
3900 static kern_return_t
release_task_hold(task_t task,int mode)3901 release_task_hold(
3902 task_t task,
3903 int mode)
3904 {
3905 boolean_t release = FALSE;
3906
3907 if (!task->active && !task_is_a_corpse(task)) {
3908 return KERN_FAILURE;
3909 }
3910
3911 /* Return success for corpse task */
3912 if (task_is_a_corpse(task)) {
3913 return KERN_SUCCESS;
3914 }
3915
3916 if (mode == TASK_HOLD_PIDSUSPEND) {
3917 if (task->pidsuspended == FALSE) {
3918 return KERN_FAILURE;
3919 }
3920 task->pidsuspended = FALSE;
3921 }
3922
3923 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3924 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3925 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3926 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3927 task->user_stop_count, mode, task->legacy_stop_count);
3928
3929 #if MACH_ASSERT
3930 /*
3931 * This is obviously not robust; if we suspend one task and then resume a different one,
3932 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3933 * or buggy suspender.
3934 */
3935 current_task()->suspends_outstanding--;
3936 #endif
3937
3938 if (mode == TASK_HOLD_LEGACY_ALL) {
3939 if (task->legacy_stop_count >= task->user_stop_count) {
3940 task->user_stop_count = 0;
3941 release = TRUE;
3942 } else {
3943 task->user_stop_count -= task->legacy_stop_count;
3944 }
3945 task->legacy_stop_count = 0;
3946 } else {
3947 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3948 task->legacy_stop_count--;
3949 }
3950 if (--task->user_stop_count == 0) {
3951 release = TRUE;
3952 }
3953 }
3954 } else {
3955 return KERN_FAILURE;
3956 }
3957
3958 /*
3959 * Release the task if necessary.
3960 */
3961 if (release) {
3962 task_release_locked(task);
3963 }
3964
3965 return KERN_SUCCESS;
3966 }
3967
3968 boolean_t
get_task_suspended(task_t task)3969 get_task_suspended(task_t task)
3970 {
3971 return 0 != task->user_stop_count;
3972 }
3973
3974 /*
3975 * task_suspend:
3976 *
3977 * Implement an (old-fashioned) user-level suspension on a task.
3978 *
3979 * Because the user isn't expecting to have to manage a suspension
3980 * token, we'll track it for him in the kernel in the form of a naked
3981 * send right to the task's resume port. All such send rights
3982 * account for a single suspension against the task (unlike task_suspend2()
3983 * where each caller gets a unique suspension count represented by a
3984 * unique send-once right).
3985 *
3986 * Conditions:
3987 * The caller holds a reference to the task
3988 */
3989 kern_return_t
task_suspend(task_t task)3990 task_suspend(
3991 task_t task)
3992 {
3993 kern_return_t kr;
3994 mach_port_t port;
3995 mach_port_name_t name;
3996
3997 if (task == TASK_NULL || task == kernel_task) {
3998 return KERN_INVALID_ARGUMENT;
3999 }
4000
4001 /*
4002 * place a legacy hold on the task.
4003 */
4004 task_lock(task);
4005 kr = place_task_hold(task, TASK_HOLD_LEGACY);
4006 task_unlock(task);
4007
4008 if (kr != KERN_SUCCESS) {
4009 return kr;
4010 }
4011
4012 /*
4013 * Claim a send right on the task resume port, and request a no-senders
4014 * notification on that port (if none outstanding).
4015 */
4016 itk_lock(task);
4017 port = task->itk_resume;
4018 if (port == IP_NULL) {
4019 port = ipc_kobject_alloc_port(task, IKOT_TASK_RESUME,
4020 IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
4021 task->itk_resume = port;
4022 } else {
4023 (void)ipc_kobject_make_send_nsrequest(port, task, IKOT_TASK_RESUME);
4024 }
4025 itk_unlock(task);
4026
4027 /*
4028 * Copyout the send right into the calling task's IPC space. It won't know it is there,
4029 * but we'll look it up when calling a traditional resume. Any IPC operations that
4030 * deallocate the send right will auto-release the suspension.
4031 */
4032 if (IP_VALID(port)) {
4033 kr = ipc_object_copyout(current_space(), ip_to_object(port),
4034 MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
4035 NULL, NULL, &name);
4036 } else {
4037 kr = KERN_SUCCESS;
4038 }
4039 if (kr != KERN_SUCCESS) {
4040 printf("warning: %s(%d) failed to copyout suspension "
4041 "token for pid %d with error: %d\n",
4042 proc_name_address(get_bsdtask_info(current_task())),
4043 proc_pid(get_bsdtask_info(current_task())),
4044 task_pid(task), kr);
4045 }
4046
4047 return kr;
4048 }
4049
4050 /*
4051 * task_resume:
4052 * Release a user hold on a task.
4053 *
4054 * Conditions:
4055 * The caller holds a reference to the task
4056 */
4057 kern_return_t
task_resume(task_t task)4058 task_resume(
4059 task_t task)
4060 {
4061 kern_return_t kr;
4062 mach_port_name_t resume_port_name;
4063 ipc_entry_t resume_port_entry;
4064 ipc_space_t space = current_task()->itk_space;
4065
4066 if (task == TASK_NULL || task == kernel_task) {
4067 return KERN_INVALID_ARGUMENT;
4068 }
4069
4070 /* release a legacy task hold */
4071 task_lock(task);
4072 kr = release_task_hold(task, TASK_HOLD_LEGACY);
4073 task_unlock(task);
4074
4075 itk_lock(task); /* for itk_resume */
4076 is_write_lock(space); /* spin lock */
4077 if (is_active(space) && IP_VALID(task->itk_resume) &&
4078 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
4079 /*
4080 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
4081 * we are holding one less legacy hold on the task from this caller. If the release failed,
4082 * go ahead and drop all the rights, as someone either already released our holds or the task
4083 * is gone.
4084 */
4085 itk_unlock(task);
4086 if (kr == KERN_SUCCESS) {
4087 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
4088 } else {
4089 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
4090 }
4091 /* space unlocked */
4092 } else {
4093 itk_unlock(task);
4094 is_write_unlock(space);
4095 if (kr == KERN_SUCCESS) {
4096 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
4097 proc_name_address(get_bsdtask_info(current_task())), proc_pid(get_bsdtask_info(current_task())),
4098 task_pid(task));
4099 }
4100 }
4101
4102 return kr;
4103 }
4104
4105 /*
4106 * Suspend a task that is already protected by a held lock.
4107 * Making/holding a token/reference/port is the caller's responsibility.
4108 */
4109 kern_return_t
task_suspend_internal_locked(task_t task)4110 task_suspend_internal_locked(task_t task)
4111 {
4112 if (task == TASK_NULL || task == kernel_task) {
4113 return KERN_INVALID_ARGUMENT;
4114 }
4115
4116 return place_task_hold(task, TASK_HOLD_NORMAL);
4117 }
4118
4119 /*
4120 * Suspend a task.
4121 * Making/holding a token/reference/port is the caller's responsibility.
4122 */
4123 kern_return_t
task_suspend_internal(task_t task)4124 task_suspend_internal(task_t task)
4125 {
4126 kern_return_t kr;
4127
4128 if (task == TASK_NULL || task == kernel_task) {
4129 return KERN_INVALID_ARGUMENT;
4130 }
4131
4132 task_lock(task);
4133 kr = task_suspend_internal_locked(task);
4134 task_unlock(task);
4135 return kr;
4136 }
4137
4138 /*
4139 * Suspend the target task, and return a suspension token. The token
4140 * represents a reference on the suspended task.
4141 */
4142 static kern_return_t
task_suspend2_grp(task_t task,task_suspension_token_t * suspend_token,task_grp_t grp)4143 task_suspend2_grp(
4144 task_t task,
4145 task_suspension_token_t *suspend_token,
4146 task_grp_t grp)
4147 {
4148 kern_return_t kr;
4149
4150 kr = task_suspend_internal(task);
4151 if (kr != KERN_SUCCESS) {
4152 *suspend_token = TASK_NULL;
4153 return kr;
4154 }
4155
4156 /*
4157 * Take a reference on the target task and return that to the caller
4158 * as a "suspension token," which can be converted into an SO right to
4159 * the now-suspended task's resume port.
4160 */
4161 task_reference_grp(task, grp);
4162 *suspend_token = task;
4163
4164 return KERN_SUCCESS;
4165 }
4166
4167 kern_return_t
task_suspend2_mig(task_t task,task_suspension_token_t * suspend_token)4168 task_suspend2_mig(
4169 task_t task,
4170 task_suspension_token_t *suspend_token)
4171 {
4172 return task_suspend2_grp(task, suspend_token, TASK_GRP_MIG);
4173 }
4174
4175 kern_return_t
task_suspend2_external(task_t task,task_suspension_token_t * suspend_token)4176 task_suspend2_external(
4177 task_t task,
4178 task_suspension_token_t *suspend_token)
4179 {
4180 return task_suspend2_grp(task, suspend_token, TASK_GRP_EXTERNAL);
4181 }
4182
4183 /*
4184 * Resume a task that is already protected by a held lock.
4185 * (reference/token/port management is caller's responsibility).
4186 */
4187 kern_return_t
task_resume_internal_locked(task_suspension_token_t task)4188 task_resume_internal_locked(
4189 task_suspension_token_t task)
4190 {
4191 if (task == TASK_NULL || task == kernel_task) {
4192 return KERN_INVALID_ARGUMENT;
4193 }
4194
4195 return release_task_hold(task, TASK_HOLD_NORMAL);
4196 }
4197
4198 /*
4199 * Resume a task.
4200 * (reference/token/port management is caller's responsibility).
4201 */
4202 kern_return_t
task_resume_internal(task_suspension_token_t task)4203 task_resume_internal(
4204 task_suspension_token_t task)
4205 {
4206 kern_return_t kr;
4207
4208 if (task == TASK_NULL || task == kernel_task) {
4209 return KERN_INVALID_ARGUMENT;
4210 }
4211
4212 task_lock(task);
4213 kr = task_resume_internal_locked(task);
4214 task_unlock(task);
4215 return kr;
4216 }
4217
4218 /*
4219 * Resume the task using a suspension token. Consumes the token's ref.
4220 */
4221 static kern_return_t
task_resume2_grp(task_suspension_token_t task,task_grp_t grp)4222 task_resume2_grp(
4223 task_suspension_token_t task,
4224 task_grp_t grp)
4225 {
4226 kern_return_t kr;
4227
4228 kr = task_resume_internal(task);
4229 task_suspension_token_deallocate_grp(task, grp);
4230
4231 return kr;
4232 }
4233
4234 kern_return_t
task_resume2_mig(task_suspension_token_t task)4235 task_resume2_mig(
4236 task_suspension_token_t task)
4237 {
4238 return task_resume2_grp(task, TASK_GRP_MIG);
4239 }
4240
4241 kern_return_t
task_resume2_external(task_suspension_token_t task)4242 task_resume2_external(
4243 task_suspension_token_t task)
4244 {
4245 return task_resume2_grp(task, TASK_GRP_EXTERNAL);
4246 }
4247
4248 static void
task_suspension_no_senders(ipc_port_t port,mach_port_mscount_t mscount)4249 task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
4250 {
4251 task_t task = convert_port_to_task_suspension_token(port);
4252 kern_return_t kr;
4253
4254 if (task == TASK_NULL) {
4255 return;
4256 }
4257
4258 if (task == kernel_task) {
4259 task_suspension_token_deallocate(task);
4260 return;
4261 }
4262
4263 task_lock(task);
4264
4265 kr = ipc_kobject_nsrequest(port, mscount, NULL);
4266 if (kr == KERN_FAILURE) {
4267 /* release all the [remaining] outstanding legacy holds */
4268 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4269 }
4270
4271 task_unlock(task);
4272
4273 task_suspension_token_deallocate(task); /* drop token reference */
4274 }
4275
4276 /*
4277 * Fires when a send once made
4278 * by convert_task_suspension_token_to_port() dies.
4279 */
4280 void
task_suspension_send_once(ipc_port_t port)4281 task_suspension_send_once(ipc_port_t port)
4282 {
4283 task_t task = convert_port_to_task_suspension_token(port);
4284
4285 if (task == TASK_NULL || task == kernel_task) {
4286 return; /* nothing to do */
4287 }
4288
4289 /* release the hold held by this specific send-once right */
4290 task_lock(task);
4291 release_task_hold(task, TASK_HOLD_NORMAL);
4292 task_unlock(task);
4293
4294 task_suspension_token_deallocate(task); /* drop token reference */
4295 }
4296
4297 static kern_return_t
task_pidsuspend_locked(task_t task)4298 task_pidsuspend_locked(task_t task)
4299 {
4300 kern_return_t kr;
4301
4302 if (task->pidsuspended) {
4303 kr = KERN_FAILURE;
4304 goto out;
4305 }
4306
4307 task->pidsuspended = TRUE;
4308
4309 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4310 if (kr != KERN_SUCCESS) {
4311 task->pidsuspended = FALSE;
4312 }
4313 out:
4314 return kr;
4315 }
4316
4317
4318 /*
4319 * task_pidsuspend:
4320 *
4321 * Suspends a task by placing a hold on its threads.
4322 *
4323 * Conditions:
4324 * The caller holds a reference to the task
4325 */
4326 kern_return_t
task_pidsuspend(task_t task)4327 task_pidsuspend(
4328 task_t task)
4329 {
4330 kern_return_t kr;
4331
4332 if (task == TASK_NULL || task == kernel_task) {
4333 return KERN_INVALID_ARGUMENT;
4334 }
4335
4336 task_lock(task);
4337
4338 kr = task_pidsuspend_locked(task);
4339
4340 task_unlock(task);
4341
4342 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4343 iokit_task_app_suspended_changed(task);
4344 }
4345
4346 return kr;
4347 }
4348
4349 /*
4350 * task_pidresume:
4351 * Resumes a previously suspended task.
4352 *
4353 * Conditions:
4354 * The caller holds a reference to the task
4355 */
4356 kern_return_t
task_pidresume(task_t task)4357 task_pidresume(
4358 task_t task)
4359 {
4360 kern_return_t kr;
4361
4362 if (task == TASK_NULL || task == kernel_task) {
4363 return KERN_INVALID_ARGUMENT;
4364 }
4365
4366 task_lock(task);
4367
4368 #if CONFIG_FREEZE
4369
4370 while (task->changing_freeze_state) {
4371 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4372 task_unlock(task);
4373 thread_block(THREAD_CONTINUE_NULL);
4374
4375 task_lock(task);
4376 }
4377 task->changing_freeze_state = TRUE;
4378 #endif
4379
4380 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4381
4382 task_unlock(task);
4383
4384 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4385 iokit_task_app_suspended_changed(task);
4386 }
4387
4388 #if CONFIG_FREEZE
4389
4390 task_lock(task);
4391
4392 if (kr == KERN_SUCCESS) {
4393 task->frozen = FALSE;
4394 }
4395 task->changing_freeze_state = FALSE;
4396 thread_wakeup(&task->changing_freeze_state);
4397
4398 task_unlock(task);
4399 #endif
4400
4401 return kr;
4402 }
4403
4404 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4405
4406 /*
4407 * task_add_turnstile_watchports:
4408 * Setup watchports to boost the main thread of the task.
4409 *
4410 * Arguments:
4411 * task: task being spawned
4412 * thread: main thread of task
4413 * portwatch_ports: array of watchports
4414 * portwatch_count: number of watchports
4415 *
4416 * Conditions:
4417 * Nothing locked.
4418 */
4419 void
task_add_turnstile_watchports(task_t task,thread_t thread,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4420 task_add_turnstile_watchports(
4421 task_t task,
4422 thread_t thread,
4423 ipc_port_t *portwatch_ports,
4424 uint32_t portwatch_count)
4425 {
4426 struct task_watchports *watchports = NULL;
4427 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4428 os_ref_count_t refs;
4429
4430 /* Check if the task has terminated */
4431 if (!task->active) {
4432 return;
4433 }
4434
4435 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4436
4437 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
4438
4439 /* Lock the ipc space */
4440 is_write_lock(task->itk_space);
4441
4442 /* Setup watchports to boost the main thread */
4443 refs = task_add_turnstile_watchports_locked(task,
4444 watchports, previous_elem_array, portwatch_ports,
4445 portwatch_count);
4446
4447 /* Drop the space lock */
4448 is_write_unlock(task->itk_space);
4449
4450 if (refs == 0) {
4451 task_watchports_deallocate(watchports);
4452 }
4453
4454 /* Drop the ref on previous_elem_array */
4455 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4456 task_watchport_elem_deallocate(previous_elem_array[i]);
4457 }
4458 }
4459
4460 /*
4461 * task_remove_turnstile_watchports:
4462 * Clear all turnstile boost on the task from watchports.
4463 *
4464 * Arguments:
4465 * task: task being terminated
4466 *
4467 * Conditions:
4468 * Nothing locked.
4469 */
4470 void
task_remove_turnstile_watchports(task_t task)4471 task_remove_turnstile_watchports(
4472 task_t task)
4473 {
4474 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4475 struct task_watchports *watchports = NULL;
4476 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4477 uint32_t portwatch_count;
4478
4479 /* Lock the ipc space */
4480 is_write_lock(task->itk_space);
4481
4482 /* Check if watchport boost exist */
4483 if (task->watchports == NULL) {
4484 is_write_unlock(task->itk_space);
4485 return;
4486 }
4487 watchports = task->watchports;
4488 portwatch_count = watchports->tw_elem_array_count;
4489
4490 refs = task_remove_turnstile_watchports_locked(task, watchports,
4491 port_freelist);
4492
4493 is_write_unlock(task->itk_space);
4494
4495 /* Drop all the port references */
4496 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4497 ip_release(port_freelist[i]);
4498 }
4499
4500 /* Clear the task and thread references for task_watchport */
4501 if (refs == 0) {
4502 task_watchports_deallocate(watchports);
4503 }
4504 }
4505
4506 /*
4507 * task_transfer_turnstile_watchports:
4508 * Transfer all watchport turnstile boost from old task to new task.
4509 *
4510 * Arguments:
4511 * old_task: task calling exec
4512 * new_task: new exec'ed task
4513 * thread: main thread of new task
4514 *
4515 * Conditions:
4516 * Nothing locked.
4517 */
4518 void
task_transfer_turnstile_watchports(task_t old_task,task_t new_task,thread_t new_thread)4519 task_transfer_turnstile_watchports(
4520 task_t old_task,
4521 task_t new_task,
4522 thread_t new_thread)
4523 {
4524 struct task_watchports *old_watchports = NULL;
4525 struct task_watchports *new_watchports = NULL;
4526 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4527 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4528 uint32_t portwatch_count;
4529
4530 if (old_task->watchports == NULL || !new_task->active) {
4531 return;
4532 }
4533
4534 /* Get the watch port count from the old task */
4535 is_write_lock(old_task->itk_space);
4536 if (old_task->watchports == NULL) {
4537 is_write_unlock(old_task->itk_space);
4538 return;
4539 }
4540
4541 portwatch_count = old_task->watchports->tw_elem_array_count;
4542 is_write_unlock(old_task->itk_space);
4543
4544 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4545
4546 /* Lock the ipc space for old task */
4547 is_write_lock(old_task->itk_space);
4548
4549 /* Lock the ipc space for new task */
4550 is_write_lock(new_task->itk_space);
4551
4552 /* Check if watchport boost exist */
4553 if (old_task->watchports == NULL || !new_task->active) {
4554 is_write_unlock(new_task->itk_space);
4555 is_write_unlock(old_task->itk_space);
4556 (void)task_watchports_release(new_watchports);
4557 task_watchports_deallocate(new_watchports);
4558 return;
4559 }
4560
4561 old_watchports = old_task->watchports;
4562 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4563
4564 /* Setup new task watchports */
4565 new_task->watchports = new_watchports;
4566
4567 for (uint32_t i = 0; i < portwatch_count; i++) {
4568 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4569
4570 if (port == NULL) {
4571 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4572 continue;
4573 }
4574
4575 /* Lock the port and check if it has the entry */
4576 ip_mq_lock(port);
4577
4578 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4579
4580 if (ipc_port_replace_watchport_elem_conditional_locked(port,
4581 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4582 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4583
4584 task_watchports_retain(new_watchports);
4585 old_refs = task_watchports_release(old_watchports);
4586
4587 /* Check if all ports are cleaned */
4588 if (old_refs == 0) {
4589 old_task->watchports = NULL;
4590 }
4591 } else {
4592 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4593 }
4594 /* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4595 }
4596
4597 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4598 new_refs = task_watchports_release(new_watchports);
4599 if (new_refs == 0) {
4600 new_task->watchports = NULL;
4601 }
4602
4603 is_write_unlock(new_task->itk_space);
4604 is_write_unlock(old_task->itk_space);
4605
4606 /* Clear the task and thread references for old_watchport */
4607 if (old_refs == 0) {
4608 task_watchports_deallocate(old_watchports);
4609 }
4610
4611 /* Clear the task and thread references for new_watchport */
4612 if (new_refs == 0) {
4613 task_watchports_deallocate(new_watchports);
4614 }
4615 }
4616
4617 /*
4618 * task_add_turnstile_watchports_locked:
4619 * Setup watchports to boost the main thread of the task.
4620 *
4621 * Arguments:
4622 * task: task to boost
4623 * watchports: watchport structure to be attached to the task
4624 * previous_elem_array: an array of old watchport_elem to be returned to caller
4625 * portwatch_ports: array of watchports
4626 * portwatch_count: number of watchports
4627 *
4628 * Conditions:
4629 * ipc space of the task locked.
4630 * returns array of old watchport_elem in previous_elem_array
4631 */
4632 static os_ref_count_t
task_add_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,struct task_watchport_elem ** previous_elem_array,ipc_port_t * portwatch_ports,uint32_t portwatch_count)4633 task_add_turnstile_watchports_locked(
4634 task_t task,
4635 struct task_watchports *watchports,
4636 struct task_watchport_elem **previous_elem_array,
4637 ipc_port_t *portwatch_ports,
4638 uint32_t portwatch_count)
4639 {
4640 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4641
4642 /* Check if the task is still active */
4643 if (!task->active) {
4644 refs = task_watchports_release(watchports);
4645 return refs;
4646 }
4647
4648 assert(task->watchports == NULL);
4649 task->watchports = watchports;
4650
4651 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4652 ipc_port_t port = portwatch_ports[i];
4653
4654 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4655 if (port == NULL) {
4656 task_watchport_elem_clear(&watchports->tw_elem[i]);
4657 continue;
4658 }
4659
4660 ip_mq_lock(port);
4661
4662 /* Check if port is in valid state to be setup as watchport */
4663 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4664 &previous_elem_array[j]) != KERN_SUCCESS) {
4665 task_watchport_elem_clear(&watchports->tw_elem[i]);
4666 continue;
4667 }
4668 /* port unlocked on return */
4669
4670 ip_reference(port);
4671 task_watchports_retain(watchports);
4672 if (previous_elem_array[j] != NULL) {
4673 j++;
4674 }
4675 }
4676
4677 /* Drop the reference on task_watchport struct returned by os_ref_init */
4678 refs = task_watchports_release(watchports);
4679 if (refs == 0) {
4680 task->watchports = NULL;
4681 }
4682
4683 return refs;
4684 }
4685
4686 /*
4687 * task_remove_turnstile_watchports_locked:
4688 * Clear all turnstile boost on the task from watchports.
4689 *
4690 * Arguments:
4691 * task: task to remove watchports from
4692 * watchports: watchports structure for the task
4693 * port_freelist: array of ports returned with ref to caller
4694 *
4695 *
4696 * Conditions:
4697 * ipc space of the task locked.
4698 * array of ports with refs are returned in port_freelist
4699 */
4700 static os_ref_count_t
task_remove_turnstile_watchports_locked(task_t task,struct task_watchports * watchports,ipc_port_t * port_freelist)4701 task_remove_turnstile_watchports_locked(
4702 task_t task,
4703 struct task_watchports *watchports,
4704 ipc_port_t *port_freelist)
4705 {
4706 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4707
4708 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4709 ipc_port_t port = watchports->tw_elem[i].twe_port;
4710 if (port == NULL) {
4711 continue;
4712 }
4713
4714 /* Lock the port and check if it has the entry */
4715 ip_mq_lock(port);
4716 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4717 &watchports->tw_elem[i]) == KERN_SUCCESS) {
4718 task_watchport_elem_clear(&watchports->tw_elem[i]);
4719 port_freelist[j++] = port;
4720 refs = task_watchports_release(watchports);
4721
4722 /* Check if all ports are cleaned */
4723 if (refs == 0) {
4724 task->watchports = NULL;
4725 break;
4726 }
4727 }
4728 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4729 }
4730 return refs;
4731 }
4732
4733 /*
4734 * task_watchports_alloc_init:
4735 * Allocate and initialize task watchport struct.
4736 *
4737 * Conditions:
4738 * Nothing locked.
4739 */
4740 static struct task_watchports *
task_watchports_alloc_init(task_t task,thread_t thread,uint32_t count)4741 task_watchports_alloc_init(
4742 task_t task,
4743 thread_t thread,
4744 uint32_t count)
4745 {
4746 struct task_watchports *watchports = kalloc_type(struct task_watchports,
4747 struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4748
4749 task_reference(task);
4750 thread_reference(thread);
4751 watchports->tw_task = task;
4752 watchports->tw_thread = thread;
4753 watchports->tw_elem_array_count = count;
4754 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4755
4756 return watchports;
4757 }
4758
4759 /*
4760 * task_watchports_deallocate:
4761 * Deallocate task watchport struct.
4762 *
4763 * Conditions:
4764 * Nothing locked.
4765 */
4766 static void
task_watchports_deallocate(struct task_watchports * watchports)4767 task_watchports_deallocate(
4768 struct task_watchports *watchports)
4769 {
4770 uint32_t portwatch_count = watchports->tw_elem_array_count;
4771
4772 task_deallocate(watchports->tw_task);
4773 thread_deallocate(watchports->tw_thread);
4774 kfree_type(struct task_watchports, struct task_watchport_elem,
4775 portwatch_count, watchports);
4776 }
4777
4778 /*
4779 * task_watchport_elem_deallocate:
4780 * Deallocate task watchport element and release its ref on task_watchport.
4781 *
4782 * Conditions:
4783 * Nothing locked.
4784 */
4785 void
task_watchport_elem_deallocate(struct task_watchport_elem * watchport_elem)4786 task_watchport_elem_deallocate(
4787 struct task_watchport_elem *watchport_elem)
4788 {
4789 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4790 task_t task = watchport_elem->twe_task;
4791 struct task_watchports *watchports = NULL;
4792 ipc_port_t port = NULL;
4793
4794 assert(task != NULL);
4795
4796 /* Take the space lock to modify the elememt */
4797 is_write_lock(task->itk_space);
4798
4799 watchports = task->watchports;
4800 assert(watchports != NULL);
4801
4802 port = watchport_elem->twe_port;
4803 assert(port != NULL);
4804
4805 task_watchport_elem_clear(watchport_elem);
4806 refs = task_watchports_release(watchports);
4807
4808 if (refs == 0) {
4809 task->watchports = NULL;
4810 }
4811
4812 is_write_unlock(task->itk_space);
4813
4814 ip_release(port);
4815 if (refs == 0) {
4816 task_watchports_deallocate(watchports);
4817 }
4818 }
4819
4820 /*
4821 * task_has_watchports:
4822 * Return TRUE if task has watchport boosts.
4823 *
4824 * Conditions:
4825 * Nothing locked.
4826 */
4827 boolean_t
task_has_watchports(task_t task)4828 task_has_watchports(task_t task)
4829 {
4830 return task->watchports != NULL;
4831 }
4832
4833 #if DEVELOPMENT || DEBUG
4834
4835 extern void IOSleep(int);
4836
4837 kern_return_t
task_disconnect_page_mappings(task_t task)4838 task_disconnect_page_mappings(task_t task)
4839 {
4840 int n;
4841
4842 if (task == TASK_NULL || task == kernel_task) {
4843 return KERN_INVALID_ARGUMENT;
4844 }
4845
4846 /*
4847 * this function is used to strip all of the mappings from
4848 * the pmap for the specified task to force the task to
4849 * re-fault all of the pages it is actively using... this
4850 * allows us to approximate the true working set of the
4851 * specified task. We only engage if at least 1 of the
4852 * threads in the task is runnable, but we want to continuously
4853 * sweep (at least for a while - I've arbitrarily set the limit at
4854 * 100 sweeps to be re-looked at as we gain experience) to get a better
4855 * view into what areas within a page are being visited (as opposed to only
4856 * seeing the first fault of a page after the task becomes
4857 * runnable)... in the future I may
4858 * try to block until awakened by a thread in this task
4859 * being made runnable, but for now we'll periodically poll from the
4860 * user level debug tool driving the sysctl
4861 */
4862 for (n = 0; n < 100; n++) {
4863 thread_t thread;
4864 boolean_t runnable;
4865 boolean_t do_unnest;
4866 int page_count;
4867
4868 runnable = FALSE;
4869 do_unnest = FALSE;
4870
4871 task_lock(task);
4872
4873 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4874 if (thread->state & TH_RUN) {
4875 runnable = TRUE;
4876 break;
4877 }
4878 }
4879 if (n == 0) {
4880 task->task_disconnected_count++;
4881 }
4882
4883 if (task->task_unnested == FALSE) {
4884 if (runnable == TRUE) {
4885 task->task_unnested = TRUE;
4886 do_unnest = TRUE;
4887 }
4888 }
4889 task_unlock(task);
4890
4891 if (runnable == FALSE) {
4892 break;
4893 }
4894
4895 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4896 task, do_unnest, task->task_disconnected_count, 0, 0);
4897
4898 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4899
4900 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4901 task, page_count, 0, 0, 0);
4902
4903 if ((n % 5) == 4) {
4904 IOSleep(1);
4905 }
4906 }
4907 return KERN_SUCCESS;
4908 }
4909
4910 #endif
4911
4912
4913 #if CONFIG_FREEZE
4914
4915 /*
4916 * task_freeze:
4917 *
4918 * Freeze a task.
4919 *
4920 * Conditions:
4921 * The caller holds a reference to the task
4922 */
4923 extern void vm_wake_compactor_swapper(void);
4924 extern struct freezer_context freezer_context_global;
4925
4926 kern_return_t
task_freeze(task_t task,uint32_t * purgeable_count,uint32_t * wired_count,uint32_t * clean_count,uint32_t * dirty_count,uint32_t dirty_budget,uint32_t * shared_count,int * freezer_error_code,boolean_t eval_only)4927 task_freeze(
4928 task_t task,
4929 uint32_t *purgeable_count,
4930 uint32_t *wired_count,
4931 uint32_t *clean_count,
4932 uint32_t *dirty_count,
4933 uint32_t dirty_budget,
4934 uint32_t *shared_count,
4935 int *freezer_error_code,
4936 boolean_t eval_only)
4937 {
4938 kern_return_t kr = KERN_SUCCESS;
4939
4940 if (task == TASK_NULL || task == kernel_task) {
4941 return KERN_INVALID_ARGUMENT;
4942 }
4943
4944 task_lock(task);
4945
4946 while (task->changing_freeze_state) {
4947 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4948 task_unlock(task);
4949 thread_block(THREAD_CONTINUE_NULL);
4950
4951 task_lock(task);
4952 }
4953 if (task->frozen) {
4954 task_unlock(task);
4955 return KERN_FAILURE;
4956 }
4957 task->changing_freeze_state = TRUE;
4958
4959 freezer_context_global.freezer_ctx_task = task;
4960
4961 task_unlock(task);
4962
4963 kr = vm_map_freeze(task,
4964 purgeable_count,
4965 wired_count,
4966 clean_count,
4967 dirty_count,
4968 dirty_budget,
4969 shared_count,
4970 freezer_error_code,
4971 eval_only);
4972
4973 task_lock(task);
4974
4975 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4976 task->frozen = TRUE;
4977
4978 freezer_context_global.freezer_ctx_task = NULL;
4979 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4980
4981 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4982 /*
4983 * reset the counter tracking the # of swapped compressed pages
4984 * because we are now done with this freeze session and task.
4985 */
4986
4987 *dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /*used to track pageouts*/
4988 }
4989
4990 freezer_context_global.freezer_ctx_swapped_bytes = 0;
4991 }
4992
4993 task->changing_freeze_state = FALSE;
4994 thread_wakeup(&task->changing_freeze_state);
4995
4996 task_unlock(task);
4997
4998 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4999 (kr == KERN_SUCCESS) &&
5000 (eval_only == FALSE)) {
5001 vm_wake_compactor_swapper();
5002 /*
5003 * We do an explicit wakeup of the swapout thread here
5004 * because the compact_and_swap routines don't have
5005 * knowledge about these kind of "per-task packed c_segs"
5006 * and so will not be evaluating whether we need to do
5007 * a wakeup there.
5008 */
5009 thread_wakeup((event_t)&vm_swapout_thread);
5010 }
5011
5012 return kr;
5013 }
5014
5015 /*
5016 * task_thaw:
5017 *
5018 * Thaw a currently frozen task.
5019 *
5020 * Conditions:
5021 * The caller holds a reference to the task
5022 */
5023 kern_return_t
task_thaw(task_t task)5024 task_thaw(
5025 task_t task)
5026 {
5027 if (task == TASK_NULL || task == kernel_task) {
5028 return KERN_INVALID_ARGUMENT;
5029 }
5030
5031 task_lock(task);
5032
5033 while (task->changing_freeze_state) {
5034 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5035 task_unlock(task);
5036 thread_block(THREAD_CONTINUE_NULL);
5037
5038 task_lock(task);
5039 }
5040 if (!task->frozen) {
5041 task_unlock(task);
5042 return KERN_FAILURE;
5043 }
5044 task->frozen = FALSE;
5045
5046 task_unlock(task);
5047
5048 return KERN_SUCCESS;
5049 }
5050
5051 void
task_update_frozen_to_swap_acct(task_t task,int64_t amount,freezer_acct_op_t op)5052 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
5053 {
5054 /*
5055 * We don't assert that the task lock is held because we call this
5056 * routine from the decompression path and we won't be holding the
5057 * task lock. However, since we are in the context of the task we are
5058 * safe.
5059 * In the case of the task_freeze path, we call it from behind the task
5060 * lock but we don't need to because we have a reference on the proc
5061 * being frozen.
5062 */
5063
5064 assert(task);
5065 if (amount == 0) {
5066 return;
5067 }
5068
5069 if (op == CREDIT_TO_SWAP) {
5070 ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5071 } else if (op == DEBIT_FROM_SWAP) {
5072 ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5073 } else {
5074 panic("task_update_frozen_to_swap_acct: Invalid ledger op");
5075 }
5076 }
5077 #endif /* CONFIG_FREEZE */
5078
5079 kern_return_t
task_set_security_tokens(task_t task,security_token_t sec_token,audit_token_t audit_token,host_priv_t host_priv)5080 task_set_security_tokens(
5081 task_t task,
5082 security_token_t sec_token,
5083 audit_token_t audit_token,
5084 host_priv_t host_priv)
5085 {
5086 ipc_port_t host_port = IP_NULL;
5087 kern_return_t kr;
5088
5089 if (task == TASK_NULL) {
5090 return KERN_INVALID_ARGUMENT;
5091 }
5092
5093 task_lock(task);
5094 task_set_tokens(task, &sec_token, &audit_token);
5095 task_unlock(task);
5096
5097 if (host_priv != HOST_PRIV_NULL) {
5098 kr = host_get_host_priv_port(host_priv, &host_port);
5099 } else {
5100 kr = host_get_host_port(host_priv_self(), &host_port);
5101 }
5102 assert(kr == KERN_SUCCESS);
5103
5104 kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
5105 return kr;
5106 }
5107
5108 kern_return_t
task_send_trace_memory(__unused task_t target_task,__unused uint32_t pid,__unused uint64_t uniqueid)5109 task_send_trace_memory(
5110 __unused task_t target_task,
5111 __unused uint32_t pid,
5112 __unused uint64_t uniqueid)
5113 {
5114 return KERN_INVALID_ARGUMENT;
5115 }
5116
5117 /*
5118 * This routine was added, pretty much exclusively, for registering the
5119 * RPC glue vector for in-kernel short circuited tasks. Rather than
5120 * removing it completely, I have only disabled that feature (which was
5121 * the only feature at the time). It just appears that we are going to
5122 * want to add some user data to tasks in the future (i.e. bsd info,
5123 * task names, etc...), so I left it in the formal task interface.
5124 */
5125 kern_return_t
task_set_info(task_t task,task_flavor_t flavor,__unused task_info_t task_info_in,__unused mach_msg_type_number_t task_info_count)5126 task_set_info(
5127 task_t task,
5128 task_flavor_t flavor,
5129 __unused task_info_t task_info_in, /* pointer to IN array */
5130 __unused mach_msg_type_number_t task_info_count)
5131 {
5132 if (task == TASK_NULL) {
5133 return KERN_INVALID_ARGUMENT;
5134 }
5135 switch (flavor) {
5136 #if CONFIG_ATM
5137 case TASK_TRACE_MEMORY_INFO:
5138 return KERN_NOT_SUPPORTED;
5139 #endif // CONFIG_ATM
5140 default:
5141 return KERN_INVALID_ARGUMENT;
5142 }
5143 }
5144
5145 static void
_task_fill_times(task_t task,time_value_t * user_time,time_value_t * sys_time)5146 _task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
5147 {
5148 clock_sec_t sec;
5149 clock_usec_t usec;
5150
5151 struct recount_times_mach times = recount_task_terminated_times(task);
5152 absolutetime_to_microtime(times.rtm_user, &sec, &usec);
5153 user_time->seconds = (typeof(user_time->seconds))sec;
5154 user_time->microseconds = usec;
5155 absolutetime_to_microtime(times.rtm_system, &sec, &usec);
5156 sys_time->seconds = (typeof(sys_time->seconds))sec;
5157 sys_time->microseconds = usec;
5158 }
5159
5160 int radar_20146450 = 1;
5161 kern_return_t
task_info(task_t task,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)5162 task_info(
5163 task_t task,
5164 task_flavor_t flavor,
5165 task_info_t task_info_out,
5166 mach_msg_type_number_t *task_info_count)
5167 {
5168 kern_return_t error = KERN_SUCCESS;
5169 mach_msg_type_number_t original_task_info_count;
5170 bool is_kernel_task = (task == kernel_task);
5171
5172 if (task == TASK_NULL) {
5173 return KERN_INVALID_ARGUMENT;
5174 }
5175
5176 original_task_info_count = *task_info_count;
5177 task_lock(task);
5178
5179 if (task != current_task() && !task->active) {
5180 task_unlock(task);
5181 return KERN_INVALID_ARGUMENT;
5182 }
5183
5184
5185 switch (flavor) {
5186 case TASK_BASIC_INFO_32:
5187 case TASK_BASIC2_INFO_32:
5188 #if defined(__arm64__)
5189 case TASK_BASIC_INFO_64:
5190 #endif
5191 {
5192 task_basic_info_32_t basic_info;
5193 ledger_amount_t tmp;
5194
5195 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
5196 error = KERN_INVALID_ARGUMENT;
5197 break;
5198 }
5199
5200 basic_info = (task_basic_info_32_t)task_info_out;
5201
5202 basic_info->virtual_size = (typeof(basic_info->virtual_size))
5203 vm_map_adjusted_size(is_kernel_task ? kernel_map : task->map);
5204 if (flavor == TASK_BASIC2_INFO_32) {
5205 /*
5206 * The "BASIC2" flavor gets the maximum resident
5207 * size instead of the current resident size...
5208 */
5209 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, &tmp);
5210 } else {
5211 ledger_get_balance(task->ledger, task_ledgers.phys_mem, &tmp);
5212 }
5213 basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
5214
5215 _task_fill_times(task, &basic_info->user_time,
5216 &basic_info->system_time);
5217
5218 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5219 basic_info->suspend_count = task->user_stop_count;
5220
5221 *task_info_count = TASK_BASIC_INFO_32_COUNT;
5222 break;
5223 }
5224
5225 #if defined(__arm64__)
5226 case TASK_BASIC_INFO_64_2:
5227 {
5228 task_basic_info_64_2_t basic_info;
5229
5230 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
5231 error = KERN_INVALID_ARGUMENT;
5232 break;
5233 }
5234
5235 basic_info = (task_basic_info_64_2_t)task_info_out;
5236
5237 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5238 kernel_map : task->map);
5239 ledger_get_balance(task->ledger, task_ledgers.phys_mem,
5240 (ledger_amount_t *)&basic_info->resident_size);
5241 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5242 basic_info->suspend_count = task->user_stop_count;
5243 _task_fill_times(task, &basic_info->user_time,
5244 &basic_info->system_time);
5245
5246 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
5247 break;
5248 }
5249
5250 #else /* defined(__arm64__) */
5251 case TASK_BASIC_INFO_64:
5252 {
5253 task_basic_info_64_t basic_info;
5254
5255 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5256 error = KERN_INVALID_ARGUMENT;
5257 break;
5258 }
5259
5260 basic_info = (task_basic_info_64_t)task_info_out;
5261
5262 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5263 kernel_map : task->map);
5264 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5265 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5266 basic_info->suspend_count = task->user_stop_count;
5267 _task_fill_times(task, &basic_info->user_time,
5268 &basic_info->system_time);
5269
5270 *task_info_count = TASK_BASIC_INFO_64_COUNT;
5271 break;
5272 }
5273 #endif /* defined(__arm64__) */
5274
5275 case MACH_TASK_BASIC_INFO:
5276 {
5277 mach_task_basic_info_t basic_info;
5278
5279 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5280 error = KERN_INVALID_ARGUMENT;
5281 break;
5282 }
5283
5284 basic_info = (mach_task_basic_info_t)task_info_out;
5285
5286 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5287 kernel_map : task->map);
5288 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size);
5289 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &basic_info->resident_size_max);
5290 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5291 basic_info->suspend_count = task->user_stop_count;
5292 _task_fill_times(task, &basic_info->user_time,
5293 &basic_info->system_time);
5294
5295 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5296 break;
5297 }
5298
5299 case TASK_THREAD_TIMES_INFO:
5300 {
5301 task_thread_times_info_t times_info;
5302 thread_t thread;
5303
5304 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5305 error = KERN_INVALID_ARGUMENT;
5306 break;
5307 }
5308
5309 times_info = (task_thread_times_info_t)task_info_out;
5310 times_info->user_time = (time_value_t){ 0 };
5311 times_info->system_time = (time_value_t){ 0 };
5312
5313 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5314 if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5315 time_value_t user_time, system_time;
5316
5317 thread_read_times(thread, &user_time, &system_time, NULL);
5318 time_value_add(×_info->user_time, &user_time);
5319 time_value_add(×_info->system_time, &system_time);
5320 }
5321 }
5322
5323 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5324 break;
5325 }
5326
5327 case TASK_ABSOLUTETIME_INFO:
5328 {
5329 task_absolutetime_info_t info;
5330
5331 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5332 error = KERN_INVALID_ARGUMENT;
5333 break;
5334 }
5335
5336 info = (task_absolutetime_info_t)task_info_out;
5337
5338 struct recount_times_mach term_times =
5339 recount_task_terminated_times(task);
5340 struct recount_times_mach total_times = recount_task_times(task);
5341
5342 info->total_user = total_times.rtm_user;
5343 info->total_system = total_times.rtm_system;
5344 info->threads_user = total_times.rtm_user - term_times.rtm_user;
5345 info->threads_system += total_times.rtm_system - term_times.rtm_system;
5346
5347 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5348 break;
5349 }
5350
5351 case TASK_DYLD_INFO:
5352 {
5353 task_dyld_info_t info;
5354
5355 /*
5356 * We added the format field to TASK_DYLD_INFO output. For
5357 * temporary backward compatibility, accept the fact that
5358 * clients may ask for the old version - distinquished by the
5359 * size of the expected result structure.
5360 */
5361 #define TASK_LEGACY_DYLD_INFO_COUNT \
5362 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5363
5364 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5365 error = KERN_INVALID_ARGUMENT;
5366 break;
5367 }
5368
5369 info = (task_dyld_info_t)task_info_out;
5370 info->all_image_info_addr = task->all_image_info_addr;
5371 info->all_image_info_size = task->all_image_info_size;
5372
5373 /* only set format on output for those expecting it */
5374 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5375 info->all_image_info_format = task_has_64Bit_addr(task) ?
5376 TASK_DYLD_ALL_IMAGE_INFO_64 :
5377 TASK_DYLD_ALL_IMAGE_INFO_32;
5378 *task_info_count = TASK_DYLD_INFO_COUNT;
5379 } else {
5380 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5381 }
5382 break;
5383 }
5384
5385 case TASK_EXTMOD_INFO:
5386 {
5387 task_extmod_info_t info;
5388 void *p;
5389
5390 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5391 error = KERN_INVALID_ARGUMENT;
5392 break;
5393 }
5394
5395 info = (task_extmod_info_t)task_info_out;
5396
5397 p = get_bsdtask_info(task);
5398 if (p) {
5399 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5400 } else {
5401 bzero(info->task_uuid, sizeof(info->task_uuid));
5402 }
5403 info->extmod_statistics = task->extmod_statistics;
5404 *task_info_count = TASK_EXTMOD_INFO_COUNT;
5405
5406 break;
5407 }
5408
5409 case TASK_KERNELMEMORY_INFO:
5410 {
5411 task_kernelmemory_info_t tkm_info;
5412 ledger_amount_t credit, debit;
5413
5414 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5415 error = KERN_INVALID_ARGUMENT;
5416 break;
5417 }
5418
5419 tkm_info = (task_kernelmemory_info_t) task_info_out;
5420 tkm_info->total_palloc = 0;
5421 tkm_info->total_pfree = 0;
5422 tkm_info->total_salloc = 0;
5423 tkm_info->total_sfree = 0;
5424
5425 if (task == kernel_task) {
5426 /*
5427 * All shared allocs/frees from other tasks count against
5428 * the kernel private memory usage. If we are looking up
5429 * info for the kernel task, gather from everywhere.
5430 */
5431 task_unlock(task);
5432
5433 /* start by accounting for all the terminated tasks against the kernel */
5434 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5435 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5436
5437 /* count all other task/thread shared alloc/free against the kernel */
5438 lck_mtx_lock(&tasks_threads_lock);
5439
5440 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5441 queue_iterate(&tasks, task, task_t, tasks) {
5442 if (task == kernel_task) {
5443 if (ledger_get_entries(task->ledger,
5444 task_ledgers.tkm_private, &credit,
5445 &debit) == KERN_SUCCESS) {
5446 tkm_info->total_palloc += credit;
5447 tkm_info->total_pfree += debit;
5448 }
5449 }
5450 if (!ledger_get_entries(task->ledger,
5451 task_ledgers.tkm_shared, &credit, &debit)) {
5452 tkm_info->total_palloc += credit;
5453 tkm_info->total_pfree += debit;
5454 }
5455 }
5456 lck_mtx_unlock(&tasks_threads_lock);
5457 } else {
5458 if (!ledger_get_entries(task->ledger,
5459 task_ledgers.tkm_private, &credit, &debit)) {
5460 tkm_info->total_palloc = credit;
5461 tkm_info->total_pfree = debit;
5462 }
5463 if (!ledger_get_entries(task->ledger,
5464 task_ledgers.tkm_shared, &credit, &debit)) {
5465 tkm_info->total_salloc = credit;
5466 tkm_info->total_sfree = debit;
5467 }
5468 task_unlock(task);
5469 }
5470
5471 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5472 return KERN_SUCCESS;
5473 }
5474
5475 /* OBSOLETE */
5476 case TASK_SCHED_FIFO_INFO:
5477 {
5478 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5479 error = KERN_INVALID_ARGUMENT;
5480 break;
5481 }
5482
5483 error = KERN_INVALID_POLICY;
5484 break;
5485 }
5486
5487 /* OBSOLETE */
5488 case TASK_SCHED_RR_INFO:
5489 {
5490 policy_rr_base_t rr_base;
5491 uint32_t quantum_time;
5492 uint64_t quantum_ns;
5493
5494 if (*task_info_count < POLICY_RR_BASE_COUNT) {
5495 error = KERN_INVALID_ARGUMENT;
5496 break;
5497 }
5498
5499 rr_base = (policy_rr_base_t) task_info_out;
5500
5501 if (task != kernel_task) {
5502 error = KERN_INVALID_POLICY;
5503 break;
5504 }
5505
5506 rr_base->base_priority = task->priority;
5507
5508 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5509 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5510
5511 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5512
5513 *task_info_count = POLICY_RR_BASE_COUNT;
5514 break;
5515 }
5516
5517 /* OBSOLETE */
5518 case TASK_SCHED_TIMESHARE_INFO:
5519 {
5520 policy_timeshare_base_t ts_base;
5521
5522 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5523 error = KERN_INVALID_ARGUMENT;
5524 break;
5525 }
5526
5527 ts_base = (policy_timeshare_base_t) task_info_out;
5528
5529 if (task == kernel_task) {
5530 error = KERN_INVALID_POLICY;
5531 break;
5532 }
5533
5534 ts_base->base_priority = task->priority;
5535
5536 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5537 break;
5538 }
5539
5540 case TASK_SECURITY_TOKEN:
5541 {
5542 security_token_t *sec_token_p;
5543
5544 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5545 error = KERN_INVALID_ARGUMENT;
5546 break;
5547 }
5548
5549 sec_token_p = (security_token_t *) task_info_out;
5550
5551 *sec_token_p = *task_get_sec_token(task);
5552
5553 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
5554 break;
5555 }
5556
5557 case TASK_AUDIT_TOKEN:
5558 {
5559 audit_token_t *audit_token_p;
5560
5561 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5562 error = KERN_INVALID_ARGUMENT;
5563 break;
5564 }
5565
5566 audit_token_p = (audit_token_t *) task_info_out;
5567
5568 *audit_token_p = *task_get_audit_token(task);
5569
5570 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
5571 break;
5572 }
5573
5574 case TASK_SCHED_INFO:
5575 error = KERN_INVALID_ARGUMENT;
5576 break;
5577
5578 case TASK_EVENTS_INFO:
5579 {
5580 task_events_info_t events_info;
5581 thread_t thread;
5582 uint64_t n_syscalls_mach, n_syscalls_unix, n_csw;
5583
5584 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5585 error = KERN_INVALID_ARGUMENT;
5586 break;
5587 }
5588
5589 events_info = (task_events_info_t) task_info_out;
5590
5591
5592 events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5593 events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5594 events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5595 events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5596 events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5597
5598 n_syscalls_mach = task->syscalls_mach;
5599 n_syscalls_unix = task->syscalls_unix;
5600 n_csw = task->c_switch;
5601
5602 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5603 n_csw += thread->c_switch;
5604 n_syscalls_mach += thread->syscalls_mach;
5605 n_syscalls_unix += thread->syscalls_unix;
5606 }
5607
5608 events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5609 events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5610 events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5611
5612 *task_info_count = TASK_EVENTS_INFO_COUNT;
5613 break;
5614 }
5615 case TASK_AFFINITY_TAG_INFO:
5616 {
5617 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5618 error = KERN_INVALID_ARGUMENT;
5619 break;
5620 }
5621
5622 error = task_affinity_info(task, task_info_out, task_info_count);
5623 break;
5624 }
5625 case TASK_POWER_INFO:
5626 {
5627 if (*task_info_count < TASK_POWER_INFO_COUNT) {
5628 error = KERN_INVALID_ARGUMENT;
5629 break;
5630 }
5631
5632 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5633 break;
5634 }
5635
5636 case TASK_POWER_INFO_V2:
5637 {
5638 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5639 error = KERN_INVALID_ARGUMENT;
5640 break;
5641 }
5642 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5643 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5644 break;
5645 }
5646
5647 case TASK_VM_INFO:
5648 case TASK_VM_INFO_PURGEABLE:
5649 {
5650 task_vm_info_t vm_info;
5651 vm_map_t map;
5652 ledger_amount_t tmp_amount;
5653
5654 struct proc *p;
5655 uint32_t platform, sdk;
5656 p = current_proc();
5657 platform = proc_platform(p);
5658 sdk = proc_sdk(p);
5659 if (original_task_info_count > TASK_VM_INFO_COUNT) {
5660 /*
5661 * Some iOS apps pass an incorrect value for
5662 * task_info_count, expressed in number of bytes
5663 * instead of number of "natural_t" elements, which
5664 * can lead to binary compatibility issues (including
5665 * stack corruption) when the data structure is
5666 * expanded in the future.
5667 * Let's make this potential issue visible by
5668 * logging about it...
5669 */
5670 printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5671 "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5672 "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5673 __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5674 flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5675 platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5676 DTRACE_VM4(suspicious_task_vm_info_count,
5677 mach_msg_type_number_t, original_task_info_count,
5678 mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5679 uint32_t, platform,
5680 uint32_t, sdk);
5681 }
5682 #if __arm64__
5683 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5684 platform == PLATFORM_IOS &&
5685 sdk != 0 &&
5686 (sdk >> 16) <= 12) {
5687 /*
5688 * Some iOS apps pass an incorrect value for
5689 * task_info_count, expressed in number of bytes
5690 * instead of number of "natural_t" elements.
5691 * For the sake of backwards binary compatibility
5692 * for apps built with an iOS12 or older SDK and using
5693 * the "rev2" data structure, let's fix task_info_count
5694 * for them, to avoid stomping past the actual end
5695 * of their buffer.
5696 */
5697 #if DEVELOPMENT || DEBUG
5698 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5699 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5700 proc_name_address(p), original_task_info_count,
5701 TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5702 ((sdk >> 8) & 0xff), (sdk & 0xff));
5703 #endif /* DEVELOPMENT || DEBUG */
5704 DTRACE_VM4(workaround_task_vm_info_count,
5705 mach_msg_type_number_t, original_task_info_count,
5706 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5707 uint32_t, platform,
5708 uint32_t, sdk);
5709 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5710 *task_info_count = original_task_info_count;
5711 }
5712 if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5713 platform == PLATFORM_IOS &&
5714 sdk != 0 &&
5715 (sdk >> 16) <= 15) {
5716 /*
5717 * Some iOS apps pass an incorrect value for
5718 * task_info_count, expressed in number of bytes
5719 * instead of number of "natural_t" elements.
5720 */
5721 printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5722 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5723 proc_name_address(p), original_task_info_count,
5724 TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5725 ((sdk >> 8) & 0xff), (sdk & 0xff));
5726 DTRACE_VM4(workaround_task_vm_info_count,
5727 mach_msg_type_number_t, original_task_info_count,
5728 mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5729 uint32_t, platform,
5730 uint32_t, sdk);
5731 #if DEVELOPMENT || DEBUG
5732 /*
5733 * For the sake of internal builds livability,
5734 * work around this user-space bug by capping the
5735 * buffer's size to what it was with the iOS15 SDK.
5736 */
5737 original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5738 *task_info_count = original_task_info_count;
5739 #endif /* DEVELOPMENT || DEBUG */
5740 }
5741 #endif /* __arm64__ */
5742
5743 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5744 error = KERN_INVALID_ARGUMENT;
5745 break;
5746 }
5747
5748 vm_info = (task_vm_info_t)task_info_out;
5749
5750 /*
5751 * Do not hold both the task and map locks,
5752 * so convert the task lock into a map reference,
5753 * drop the task lock, then lock the map.
5754 */
5755 if (is_kernel_task) {
5756 map = kernel_map;
5757 task_unlock(task);
5758 /* no lock, no reference */
5759 } else {
5760 map = task->map;
5761 vm_map_reference(map);
5762 task_unlock(task);
5763 vm_map_lock_read(map);
5764 }
5765
5766 vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5767 vm_info->region_count = map->hdr.nentries;
5768 vm_info->page_size = vm_map_page_size(map);
5769
5770 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5771 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5772
5773 vm_info->device = 0;
5774 vm_info->device_peak = 0;
5775 ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5776 ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5777 ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5778 ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5779 ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5780 ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5781 ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5782 ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5783 ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5784
5785 vm_info->purgeable_volatile_pmap = 0;
5786 vm_info->purgeable_volatile_resident = 0;
5787 vm_info->purgeable_volatile_virtual = 0;
5788 if (is_kernel_task) {
5789 /*
5790 * We do not maintain the detailed stats for the
5791 * kernel_pmap, so just count everything as
5792 * "internal"...
5793 */
5794 vm_info->internal = vm_info->resident_size;
5795 /*
5796 * ... but since the memory held by the VM compressor
5797 * in the kernel address space ought to be attributed
5798 * to user-space tasks, we subtract it from "internal"
5799 * to give memory reporting tools a more accurate idea
5800 * of what the kernel itself is actually using, instead
5801 * of making it look like the kernel is leaking memory
5802 * when the system is under memory pressure.
5803 */
5804 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5805 PAGE_SIZE);
5806 } else {
5807 mach_vm_size_t volatile_virtual_size;
5808 mach_vm_size_t volatile_resident_size;
5809 mach_vm_size_t volatile_compressed_size;
5810 mach_vm_size_t volatile_pmap_size;
5811 mach_vm_size_t volatile_compressed_pmap_size;
5812 kern_return_t kr;
5813
5814 if (flavor == TASK_VM_INFO_PURGEABLE) {
5815 kr = vm_map_query_volatile(
5816 map,
5817 &volatile_virtual_size,
5818 &volatile_resident_size,
5819 &volatile_compressed_size,
5820 &volatile_pmap_size,
5821 &volatile_compressed_pmap_size);
5822 if (kr == KERN_SUCCESS) {
5823 vm_info->purgeable_volatile_pmap =
5824 volatile_pmap_size;
5825 if (radar_20146450) {
5826 vm_info->compressed -=
5827 volatile_compressed_pmap_size;
5828 }
5829 vm_info->purgeable_volatile_resident =
5830 volatile_resident_size;
5831 vm_info->purgeable_volatile_virtual =
5832 volatile_virtual_size;
5833 }
5834 }
5835 }
5836 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5837
5838 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5839 /* must be captured while we still have the map lock */
5840 vm_info->min_address = map->min_offset;
5841 vm_info->max_address = map->max_offset;
5842 }
5843
5844 /*
5845 * Done with vm map things, can drop the map lock and reference,
5846 * and take the task lock back.
5847 *
5848 * Re-validate that the task didn't die on us.
5849 */
5850 if (!is_kernel_task) {
5851 vm_map_unlock_read(map);
5852 vm_map_deallocate(map);
5853 }
5854 map = VM_MAP_NULL;
5855
5856 task_lock(task);
5857
5858 if ((task != current_task()) && (!task->active)) {
5859 error = KERN_INVALID_ARGUMENT;
5860 break;
5861 }
5862
5863 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5864 vm_info->phys_footprint =
5865 (mach_vm_size_t) get_task_phys_footprint(task);
5866 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5867 }
5868 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5869 /* data was captured above */
5870 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5871 }
5872
5873 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5874 ledger_get_lifetime_max(task->ledger,
5875 task_ledgers.phys_footprint,
5876 &vm_info->ledger_phys_footprint_peak);
5877 ledger_get_balance(task->ledger,
5878 task_ledgers.purgeable_nonvolatile,
5879 &vm_info->ledger_purgeable_nonvolatile);
5880 ledger_get_balance(task->ledger,
5881 task_ledgers.purgeable_nonvolatile_compressed,
5882 &vm_info->ledger_purgeable_novolatile_compressed);
5883 ledger_get_balance(task->ledger,
5884 task_ledgers.purgeable_volatile,
5885 &vm_info->ledger_purgeable_volatile);
5886 ledger_get_balance(task->ledger,
5887 task_ledgers.purgeable_volatile_compressed,
5888 &vm_info->ledger_purgeable_volatile_compressed);
5889 ledger_get_balance(task->ledger,
5890 task_ledgers.network_nonvolatile,
5891 &vm_info->ledger_tag_network_nonvolatile);
5892 ledger_get_balance(task->ledger,
5893 task_ledgers.network_nonvolatile_compressed,
5894 &vm_info->ledger_tag_network_nonvolatile_compressed);
5895 ledger_get_balance(task->ledger,
5896 task_ledgers.network_volatile,
5897 &vm_info->ledger_tag_network_volatile);
5898 ledger_get_balance(task->ledger,
5899 task_ledgers.network_volatile_compressed,
5900 &vm_info->ledger_tag_network_volatile_compressed);
5901 ledger_get_balance(task->ledger,
5902 task_ledgers.media_footprint,
5903 &vm_info->ledger_tag_media_footprint);
5904 ledger_get_balance(task->ledger,
5905 task_ledgers.media_footprint_compressed,
5906 &vm_info->ledger_tag_media_footprint_compressed);
5907 ledger_get_balance(task->ledger,
5908 task_ledgers.media_nofootprint,
5909 &vm_info->ledger_tag_media_nofootprint);
5910 ledger_get_balance(task->ledger,
5911 task_ledgers.media_nofootprint_compressed,
5912 &vm_info->ledger_tag_media_nofootprint_compressed);
5913 ledger_get_balance(task->ledger,
5914 task_ledgers.graphics_footprint,
5915 &vm_info->ledger_tag_graphics_footprint);
5916 ledger_get_balance(task->ledger,
5917 task_ledgers.graphics_footprint_compressed,
5918 &vm_info->ledger_tag_graphics_footprint_compressed);
5919 ledger_get_balance(task->ledger,
5920 task_ledgers.graphics_nofootprint,
5921 &vm_info->ledger_tag_graphics_nofootprint);
5922 ledger_get_balance(task->ledger,
5923 task_ledgers.graphics_nofootprint_compressed,
5924 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5925 ledger_get_balance(task->ledger,
5926 task_ledgers.neural_footprint,
5927 &vm_info->ledger_tag_neural_footprint);
5928 ledger_get_balance(task->ledger,
5929 task_ledgers.neural_footprint_compressed,
5930 &vm_info->ledger_tag_neural_footprint_compressed);
5931 ledger_get_balance(task->ledger,
5932 task_ledgers.neural_nofootprint,
5933 &vm_info->ledger_tag_neural_nofootprint);
5934 ledger_get_balance(task->ledger,
5935 task_ledgers.neural_nofootprint_compressed,
5936 &vm_info->ledger_tag_neural_nofootprint_compressed);
5937 *task_info_count = TASK_VM_INFO_REV3_COUNT;
5938 }
5939 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5940 if (get_bsdtask_info(task)) {
5941 vm_info->limit_bytes_remaining =
5942 memorystatus_available_memory_internal(get_bsdtask_info(task));
5943 } else {
5944 vm_info->limit_bytes_remaining = 0;
5945 }
5946 *task_info_count = TASK_VM_INFO_REV4_COUNT;
5947 }
5948 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5949 thread_t thread;
5950 uint64_t total = task->decompressions;
5951 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5952 total += thread->decompressions;
5953 }
5954 vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
5955 *task_info_count = TASK_VM_INFO_REV5_COUNT;
5956 }
5957 if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
5958 ledger_get_balance(task->ledger, task_ledgers.swapins,
5959 &vm_info->ledger_swapins);
5960 *task_info_count = TASK_VM_INFO_REV6_COUNT;
5961 }
5962
5963 break;
5964 }
5965
5966 case TASK_WAIT_STATE_INFO:
5967 {
5968 /*
5969 * Deprecated flavor. Currently allowing some results until all users
5970 * stop calling it. The results may not be accurate.
5971 */
5972 task_wait_state_info_t wait_state_info;
5973 uint64_t total_sfi_ledger_val = 0;
5974
5975 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5976 error = KERN_INVALID_ARGUMENT;
5977 break;
5978 }
5979
5980 wait_state_info = (task_wait_state_info_t) task_info_out;
5981
5982 wait_state_info->total_wait_state_time = 0;
5983 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5984
5985 #if CONFIG_SCHED_SFI
5986 int i, prev_lentry = -1;
5987 int64_t val_credit, val_debit;
5988
5989 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5990 val_credit = 0;
5991 /*
5992 * checking with prev_lentry != entry ensures adjacent classes
5993 * which share the same ledger do not add wait times twice.
5994 * Note: Use ledger() call to get data for each individual sfi class.
5995 */
5996 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5997 KERN_SUCCESS == ledger_get_entries(task->ledger,
5998 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5999 total_sfi_ledger_val += val_credit;
6000 }
6001 prev_lentry = task_ledgers.sfi_wait_times[i];
6002 }
6003
6004 #endif /* CONFIG_SCHED_SFI */
6005 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
6006 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
6007
6008 break;
6009 }
6010 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
6011 {
6012 #if DEVELOPMENT || DEBUG
6013 pvm_account_info_t acnt_info;
6014
6015 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
6016 error = KERN_INVALID_ARGUMENT;
6017 break;
6018 }
6019
6020 if (task_info_out == NULL) {
6021 error = KERN_INVALID_ARGUMENT;
6022 break;
6023 }
6024
6025 acnt_info = (pvm_account_info_t) task_info_out;
6026
6027 error = vm_purgeable_account(task, acnt_info);
6028
6029 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
6030
6031 break;
6032 #else /* DEVELOPMENT || DEBUG */
6033 error = KERN_NOT_SUPPORTED;
6034 break;
6035 #endif /* DEVELOPMENT || DEBUG */
6036 }
6037 case TASK_FLAGS_INFO:
6038 {
6039 task_flags_info_t flags_info;
6040
6041 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
6042 error = KERN_INVALID_ARGUMENT;
6043 break;
6044 }
6045
6046 flags_info = (task_flags_info_t)task_info_out;
6047
6048 /* only publish the 64-bit flag of the task */
6049 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
6050
6051 *task_info_count = TASK_FLAGS_INFO_COUNT;
6052 break;
6053 }
6054
6055 case TASK_DEBUG_INFO_INTERNAL:
6056 {
6057 #if DEVELOPMENT || DEBUG
6058 task_debug_info_internal_t dbg_info;
6059 ipc_space_t space = task->itk_space;
6060 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
6061 error = KERN_NOT_SUPPORTED;
6062 break;
6063 }
6064
6065 if (task_info_out == NULL) {
6066 error = KERN_INVALID_ARGUMENT;
6067 break;
6068 }
6069 dbg_info = (task_debug_info_internal_t) task_info_out;
6070 dbg_info->ipc_space_size = 0;
6071
6072 if (space) {
6073 smr_ipc_enter();
6074 ipc_entry_table_t table = smr_entered_load(&space->is_table);
6075 if (table) {
6076 dbg_info->ipc_space_size =
6077 ipc_entry_table_count(table);
6078 }
6079 smr_ipc_leave();
6080 }
6081
6082 dbg_info->suspend_count = task->suspend_count;
6083
6084 error = KERN_SUCCESS;
6085 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
6086 break;
6087 #else /* DEVELOPMENT || DEBUG */
6088 error = KERN_NOT_SUPPORTED;
6089 break;
6090 #endif /* DEVELOPMENT || DEBUG */
6091 }
6092 case TASK_SUSPEND_STATS_INFO:
6093 {
6094 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6095 if (*task_info_count < TASK_SUSPEND_STATS_INFO_COUNT || task_info_out == NULL) {
6096 error = KERN_INVALID_ARGUMENT;
6097 break;
6098 }
6099 error = _task_get_suspend_stats_locked(task, (task_suspend_stats_t)task_info_out);
6100 *task_info_count = TASK_SUSPEND_STATS_INFO_COUNT;
6101 break;
6102 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6103 error = KERN_NOT_SUPPORTED;
6104 break;
6105 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6106 }
6107 case TASK_SUSPEND_SOURCES_INFO:
6108 {
6109 #if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6110 if (*task_info_count < TASK_SUSPEND_SOURCES_INFO_COUNT || task_info_out == NULL) {
6111 error = KERN_INVALID_ARGUMENT;
6112 break;
6113 }
6114 error = _task_get_suspend_sources_locked(task, (task_suspend_source_t)task_info_out);
6115 *task_info_count = TASK_SUSPEND_SOURCES_INFO_COUNT;
6116 break;
6117 #else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6118 error = KERN_NOT_SUPPORTED;
6119 break;
6120 #endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6121 }
6122 default:
6123 error = KERN_INVALID_ARGUMENT;
6124 }
6125
6126 task_unlock(task);
6127 return error;
6128 }
6129
6130 /*
6131 * task_info_from_user
6132 *
6133 * When calling task_info from user space,
6134 * this function will be executed as mig server side
6135 * instead of calling directly into task_info.
6136 * This gives the possibility to perform more security
6137 * checks on task_port.
6138 *
6139 * In the case of TASK_DYLD_INFO, we require the more
6140 * privileged task_read_port not the less-privileged task_name_port.
6141 *
6142 */
6143 kern_return_t
task_info_from_user(mach_port_t task_port,task_flavor_t flavor,task_info_t task_info_out,mach_msg_type_number_t * task_info_count)6144 task_info_from_user(
6145 mach_port_t task_port,
6146 task_flavor_t flavor,
6147 task_info_t task_info_out,
6148 mach_msg_type_number_t *task_info_count)
6149 {
6150 task_t task;
6151 kern_return_t ret;
6152
6153 if (flavor == TASK_DYLD_INFO) {
6154 task = convert_port_to_task_read(task_port);
6155 } else {
6156 task = convert_port_to_task_name(task_port);
6157 }
6158
6159 ret = task_info(task, flavor, task_info_out, task_info_count);
6160
6161 task_deallocate(task);
6162
6163 return ret;
6164 }
6165
6166 /*
6167 * Routine: task_dyld_process_info_update_helper
6168 *
6169 * Release send rights in release_ports.
6170 *
6171 * If no active ports found in task's dyld notifier array, unset the magic value
6172 * in user space to indicate so.
6173 *
6174 * Condition:
6175 * task's itk_lock is locked, and is unlocked upon return.
6176 * Global g_dyldinfo_mtx is locked, and is unlocked upon return.
6177 */
6178 void
task_dyld_process_info_update_helper(task_t task,size_t active_count,vm_map_address_t magic_addr,ipc_port_t * release_ports,size_t release_count)6179 task_dyld_process_info_update_helper(
6180 task_t task,
6181 size_t active_count,
6182 vm_map_address_t magic_addr, /* a userspace address */
6183 ipc_port_t *release_ports,
6184 size_t release_count)
6185 {
6186 void *notifiers_ptr = NULL;
6187
6188 assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
6189
6190 if (active_count == 0) {
6191 assert(task->itk_dyld_notify != NULL);
6192 notifiers_ptr = task->itk_dyld_notify;
6193 task->itk_dyld_notify = NULL;
6194 itk_unlock(task);
6195
6196 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6197 (void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
6198 } else {
6199 itk_unlock(task);
6200 (void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
6201 magic_addr); /* reset magic */
6202 }
6203
6204 lck_mtx_unlock(&g_dyldinfo_mtx);
6205
6206 for (size_t i = 0; i < release_count; i++) {
6207 ipc_port_release_send(release_ports[i]);
6208 }
6209 }
6210
6211 /*
6212 * Routine: task_dyld_process_info_notify_register
6213 *
6214 * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
6215 * memory for the array if it's the first port to be registered. Also cleanup
6216 * any dead rights found in the array.
6217 *
6218 * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
6219 *
6220 * Args:
6221 * task: Target task for the registration.
6222 * sright: A send right.
6223 *
6224 * Returns:
6225 * KERN_SUCCESS: Registration succeeded.
6226 * KERN_INVALID_TASK: task is invalid.
6227 * KERN_INVALID_RIGHT: sright is invalid.
6228 * KERN_DENIED: Security policy denied this call.
6229 * KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
6230 * KERN_NO_SPACE: No available notifier port slot left for this task.
6231 * KERN_RIGHT_EXISTS: The notifier port is already registered and active.
6232 *
6233 * Other error code see task_info().
6234 *
6235 * See Also:
6236 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6237 */
6238 kern_return_t
task_dyld_process_info_notify_register(task_t task,ipc_port_t sright)6239 task_dyld_process_info_notify_register(
6240 task_t task,
6241 ipc_port_t sright)
6242 {
6243 struct task_dyld_info dyld_info;
6244 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6245 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6246 uint32_t release_count = 0, active_count = 0;
6247 mach_vm_address_t ports_addr; /* a user space address */
6248 kern_return_t kr;
6249 boolean_t right_exists = false;
6250 ipc_port_t *notifiers_ptr = NULL;
6251 ipc_port_t *portp;
6252
6253 if (task == TASK_NULL || task == kernel_task) {
6254 return KERN_INVALID_TASK;
6255 }
6256
6257 if (!IP_VALID(sright)) {
6258 return KERN_INVALID_RIGHT;
6259 }
6260
6261 #if CONFIG_MACF
6262 if (mac_task_check_dyld_process_info_notify_register()) {
6263 return KERN_DENIED;
6264 }
6265 #endif
6266
6267 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6268 if (kr) {
6269 return kr;
6270 }
6271
6272 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6273 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6274 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6275 } else {
6276 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6277 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6278 }
6279
6280 if (task->itk_dyld_notify == NULL) {
6281 notifiers_ptr = kalloc_type(ipc_port_t,
6282 DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
6283 Z_WAITOK | Z_ZERO | Z_NOFAIL);
6284 }
6285
6286 lck_mtx_lock(&g_dyldinfo_mtx);
6287 itk_lock(task);
6288
6289 if (task->itk_dyld_notify == NULL) {
6290 task->itk_dyld_notify = notifiers_ptr;
6291 notifiers_ptr = NULL;
6292 }
6293
6294 assert(task->itk_dyld_notify != NULL);
6295 /* First pass: clear dead names and check for duplicate registration */
6296 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6297 portp = &task->itk_dyld_notify[slot];
6298 if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
6299 release_ports[release_count++] = *portp;
6300 *portp = IPC_PORT_NULL;
6301 } else if (*portp == sright) {
6302 /* the port is already registered and is active */
6303 right_exists = true;
6304 }
6305
6306 if (*portp != IPC_PORT_NULL) {
6307 active_count++;
6308 }
6309 }
6310
6311 if (right_exists) {
6312 /* skip second pass */
6313 kr = KERN_RIGHT_EXISTS;
6314 goto out;
6315 }
6316
6317 /* Second pass: register the port */
6318 kr = KERN_NO_SPACE;
6319 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6320 portp = &task->itk_dyld_notify[slot];
6321 if (*portp == IPC_PORT_NULL) {
6322 *portp = sright;
6323 active_count++;
6324 kr = KERN_SUCCESS;
6325 break;
6326 }
6327 }
6328
6329 out:
6330 assert(active_count > 0);
6331
6332 task_dyld_process_info_update_helper(task, active_count,
6333 (vm_map_address_t)ports_addr, release_ports, release_count);
6334 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6335
6336 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6337
6338 return kr;
6339 }
6340
6341 /*
6342 * Routine: task_dyld_process_info_notify_deregister
6343 *
6344 * Remove a send right in target task's itk_dyld_notify array matching the receive
6345 * right name passed in. Deallocate kernel memory for the array if it's the last port to
6346 * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6347 *
6348 * Does not consume any reference.
6349 *
6350 * Args:
6351 * task: Target task for the deregistration.
6352 * rcv_name: The name denoting the receive right in caller's space.
6353 *
6354 * Returns:
6355 * KERN_SUCCESS: A matching entry found and degistration succeeded.
6356 * KERN_INVALID_TASK: task is invalid.
6357 * KERN_INVALID_NAME: name is invalid.
6358 * KERN_DENIED: Security policy denied this call.
6359 * KERN_FAILURE: A matching entry is not found.
6360 * KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6361 *
6362 * Other error code see task_info().
6363 *
6364 * See Also:
6365 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6366 */
6367 kern_return_t
task_dyld_process_info_notify_deregister(task_t task,mach_port_name_t rcv_name)6368 task_dyld_process_info_notify_deregister(
6369 task_t task,
6370 mach_port_name_t rcv_name)
6371 {
6372 struct task_dyld_info dyld_info;
6373 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6374 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6375 uint32_t release_count = 0, active_count = 0;
6376 boolean_t port_found = false;
6377 mach_vm_address_t ports_addr; /* a user space address */
6378 ipc_port_t sright;
6379 kern_return_t kr;
6380 ipc_port_t *portp;
6381
6382 if (task == TASK_NULL || task == kernel_task) {
6383 return KERN_INVALID_TASK;
6384 }
6385
6386 if (!MACH_PORT_VALID(rcv_name)) {
6387 return KERN_INVALID_NAME;
6388 }
6389
6390 #if CONFIG_MACF
6391 if (mac_task_check_dyld_process_info_notify_register()) {
6392 return KERN_DENIED;
6393 }
6394 #endif
6395
6396 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
6397 if (kr) {
6398 return kr;
6399 }
6400
6401 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6402 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6403 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6404 } else {
6405 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6406 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6407 }
6408
6409 kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
6410 if (kr) {
6411 return KERN_INVALID_RIGHT;
6412 }
6413
6414 ip_reference(sright);
6415 ip_mq_unlock(sright);
6416
6417 assert(sright != IPC_PORT_NULL);
6418
6419 lck_mtx_lock(&g_dyldinfo_mtx);
6420 itk_lock(task);
6421
6422 if (task->itk_dyld_notify == NULL) {
6423 itk_unlock(task);
6424 lck_mtx_unlock(&g_dyldinfo_mtx);
6425 ip_release(sright);
6426 return KERN_FAILURE;
6427 }
6428
6429 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6430 portp = &task->itk_dyld_notify[slot];
6431 if (*portp == sright) {
6432 release_ports[release_count++] = *portp;
6433 *portp = IPC_PORT_NULL;
6434 port_found = true;
6435 } else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6436 release_ports[release_count++] = *portp;
6437 *portp = IPC_PORT_NULL;
6438 }
6439
6440 if (*portp != IPC_PORT_NULL) {
6441 active_count++;
6442 }
6443 }
6444
6445 task_dyld_process_info_update_helper(task, active_count,
6446 (vm_map_address_t)ports_addr, release_ports, release_count);
6447 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6448
6449 ip_release(sright);
6450
6451 return port_found ? KERN_SUCCESS : KERN_FAILURE;
6452 }
6453
6454 /*
6455 * task_power_info
6456 *
6457 * Returns power stats for the task.
6458 * Note: Called with task locked.
6459 */
6460 void
task_power_info_locked(task_t task,task_power_info_t info,gpu_energy_data_t ginfo,task_power_info_v2_t infov2,struct task_power_info_extra * extra_info)6461 task_power_info_locked(
6462 task_t task,
6463 task_power_info_t info,
6464 gpu_energy_data_t ginfo,
6465 task_power_info_v2_t infov2,
6466 struct task_power_info_extra *extra_info)
6467 {
6468 thread_t thread;
6469 ledger_amount_t tmp;
6470
6471 uint64_t runnable_time_sum = 0;
6472
6473 task_lock_assert_owned(task);
6474
6475 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
6476 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
6477 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
6478 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
6479
6480 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6481 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6482
6483 struct recount_usage usage = { 0 };
6484 struct recount_usage usage_perf = { 0 };
6485 recount_task_usage_perf_only(task, &usage, &usage_perf);
6486
6487 info->total_user = usage.ru_user_time_mach;
6488 info->total_system = usage.ru_system_time_mach;
6489 runnable_time_sum = task->total_runnable_time;
6490
6491 if (ginfo) {
6492 ginfo->task_gpu_utilisation = task->task_gpu_ns;
6493 }
6494
6495 if (infov2) {
6496 infov2->task_ptime = usage_perf.ru_system_time_mach +
6497 usage_perf.ru_user_time_mach;
6498 infov2->task_pset_switches = task->ps_switch;
6499 #if CONFIG_PERVASIVE_ENERGY
6500 infov2->task_energy = usage.ru_energy_nj;
6501 #endif /* CONFIG_PERVASIVE_ENERGY */
6502 }
6503
6504 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6505 spl_t x;
6506
6507 if (thread->options & TH_OPT_IDLE_THREAD) {
6508 continue;
6509 }
6510
6511 x = splsched();
6512 thread_lock(thread);
6513
6514 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6515 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6516
6517 if (infov2) {
6518 infov2->task_pset_switches += thread->ps_switch;
6519 }
6520
6521 runnable_time_sum += timer_grab(&thread->runnable_timer);
6522
6523 if (ginfo) {
6524 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6525 }
6526 thread_unlock(thread);
6527 splx(x);
6528 }
6529
6530 if (extra_info) {
6531 extra_info->runnable_time = runnable_time_sum;
6532 #if CONFIG_PERVASIVE_CPI
6533 extra_info->cycles = usage.ru_cycles;
6534 extra_info->instructions = usage.ru_instructions;
6535 extra_info->pcycles = usage_perf.ru_cycles;
6536 extra_info->pinstructions = usage_perf.ru_instructions;
6537 extra_info->user_ptime = usage_perf.ru_user_time_mach;
6538 extra_info->system_ptime = usage_perf.ru_system_time_mach;
6539 #endif // CONFIG_PERVASIVE_CPI
6540 #if CONFIG_PERVASIVE_ENERGY
6541 extra_info->energy = usage.ru_energy_nj;
6542 extra_info->penergy = usage_perf.ru_energy_nj;
6543 #endif // CONFIG_PERVASIVE_ENERGY
6544 }
6545 }
6546
6547 /*
6548 * task_gpu_utilisation
6549 *
6550 * Returns the total gpu time used by the all the threads of the task
6551 * (both dead and alive)
6552 */
6553 uint64_t
task_gpu_utilisation(task_t task)6554 task_gpu_utilisation(
6555 task_t task)
6556 {
6557 uint64_t gpu_time = 0;
6558 #if defined(__x86_64__)
6559 thread_t thread;
6560
6561 task_lock(task);
6562 gpu_time += task->task_gpu_ns;
6563
6564 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6565 spl_t x;
6566 x = splsched();
6567 thread_lock(thread);
6568 gpu_time += ml_gpu_stat(thread);
6569 thread_unlock(thread);
6570 splx(x);
6571 }
6572
6573 task_unlock(task);
6574 #else /* defined(__x86_64__) */
6575 /* silence compiler warning */
6576 (void)task;
6577 #endif /* defined(__x86_64__) */
6578 return gpu_time;
6579 }
6580
6581 /* This function updates the cpu time in the arrays for each
6582 * effective and requested QoS class
6583 */
6584 void
task_update_cpu_time_qos_stats(task_t task,uint64_t * eqos_stats,uint64_t * rqos_stats)6585 task_update_cpu_time_qos_stats(
6586 task_t task,
6587 uint64_t *eqos_stats,
6588 uint64_t *rqos_stats)
6589 {
6590 if (!eqos_stats && !rqos_stats) {
6591 return;
6592 }
6593
6594 task_lock(task);
6595 thread_t thread;
6596 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6597 if (thread->options & TH_OPT_IDLE_THREAD) {
6598 continue;
6599 }
6600
6601 thread_update_qos_cpu_time(thread);
6602 }
6603
6604 if (eqos_stats) {
6605 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6606 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6607 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6608 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6609 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6610 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6611 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6612 }
6613
6614 if (rqos_stats) {
6615 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6616 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6617 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6618 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6619 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6620 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6621 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6622 }
6623
6624 task_unlock(task);
6625 }
6626
6627 kern_return_t
task_purgable_info(task_t task,task_purgable_info_t * stats)6628 task_purgable_info(
6629 task_t task,
6630 task_purgable_info_t *stats)
6631 {
6632 if (task == TASK_NULL || stats == NULL) {
6633 return KERN_INVALID_ARGUMENT;
6634 }
6635 /* Take task reference */
6636 task_reference(task);
6637 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6638 /* Drop task reference */
6639 task_deallocate(task);
6640 return KERN_SUCCESS;
6641 }
6642
6643 void
task_vtimer_set(task_t task,integer_t which)6644 task_vtimer_set(
6645 task_t task,
6646 integer_t which)
6647 {
6648 thread_t thread;
6649 spl_t x;
6650
6651 task_lock(task);
6652
6653 task->vtimers |= which;
6654
6655 switch (which) {
6656 case TASK_VTIMER_USER:
6657 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6658 x = splsched();
6659 thread_lock(thread);
6660 struct recount_times_mach times = recount_thread_times(thread);
6661 thread->vtimer_user_save = times.rtm_user;
6662 thread_unlock(thread);
6663 splx(x);
6664 }
6665 break;
6666
6667 case TASK_VTIMER_PROF:
6668 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6669 x = splsched();
6670 thread_lock(thread);
6671 thread->vtimer_prof_save = recount_thread_time_mach(thread);
6672 thread_unlock(thread);
6673 splx(x);
6674 }
6675 break;
6676
6677 case TASK_VTIMER_RLIM:
6678 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6679 x = splsched();
6680 thread_lock(thread);
6681 thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6682 thread_unlock(thread);
6683 splx(x);
6684 }
6685 break;
6686 }
6687
6688 task_unlock(task);
6689 }
6690
6691 void
task_vtimer_clear(task_t task,integer_t which)6692 task_vtimer_clear(
6693 task_t task,
6694 integer_t which)
6695 {
6696 task_lock(task);
6697
6698 task->vtimers &= ~which;
6699
6700 task_unlock(task);
6701 }
6702
6703 void
task_vtimer_update(__unused task_t task,integer_t which,uint32_t * microsecs)6704 task_vtimer_update(
6705 __unused
6706 task_t task,
6707 integer_t which,
6708 uint32_t *microsecs)
6709 {
6710 thread_t thread = current_thread();
6711 uint32_t tdelt = 0;
6712 clock_sec_t secs = 0;
6713 uint64_t tsum;
6714
6715 assert(task == current_task());
6716
6717 spl_t s = splsched();
6718 thread_lock(thread);
6719
6720 if ((task->vtimers & which) != (uint32_t)which) {
6721 thread_unlock(thread);
6722 splx(s);
6723 return;
6724 }
6725
6726 switch (which) {
6727 case TASK_VTIMER_USER:;
6728 struct recount_times_mach times = recount_thread_times(thread);
6729 tsum = times.rtm_user;
6730 tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6731 thread->vtimer_user_save = tsum;
6732 absolutetime_to_microtime(tdelt, &secs, microsecs);
6733 break;
6734
6735 case TASK_VTIMER_PROF:
6736 tsum = recount_current_thread_time_mach();
6737 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6738 absolutetime_to_microtime(tdelt, &secs, microsecs);
6739 /* if the time delta is smaller than a usec, ignore */
6740 if (*microsecs != 0) {
6741 thread->vtimer_prof_save = tsum;
6742 }
6743 break;
6744
6745 case TASK_VTIMER_RLIM:
6746 tsum = recount_current_thread_time_mach();
6747 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6748 thread->vtimer_rlim_save = tsum;
6749 absolutetime_to_microtime(tdelt, &secs, microsecs);
6750 break;
6751 }
6752
6753 thread_unlock(thread);
6754 splx(s);
6755 }
6756
6757 uint64_t
get_task_dispatchqueue_offset(task_t task)6758 get_task_dispatchqueue_offset(
6759 task_t task)
6760 {
6761 return task->dispatchqueue_offset;
6762 }
6763
6764 void
task_synchronizer_destroy_all(task_t task)6765 task_synchronizer_destroy_all(task_t task)
6766 {
6767 /*
6768 * Destroy owned semaphores
6769 */
6770 semaphore_destroy_all(task);
6771 }
6772
6773 /*
6774 * Install default (machine-dependent) initial thread state
6775 * on the task. Subsequent thread creation will have this initial
6776 * state set on the thread by machine_thread_inherit_taskwide().
6777 * Flavors and structures are exactly the same as those to thread_set_state()
6778 */
6779 kern_return_t
task_set_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t state_count)6780 task_set_state(
6781 task_t task,
6782 int flavor,
6783 thread_state_t state,
6784 mach_msg_type_number_t state_count)
6785 {
6786 kern_return_t ret;
6787
6788 if (task == TASK_NULL) {
6789 return KERN_INVALID_ARGUMENT;
6790 }
6791
6792 task_lock(task);
6793
6794 if (!task->active) {
6795 task_unlock(task);
6796 return KERN_FAILURE;
6797 }
6798
6799 ret = machine_task_set_state(task, flavor, state, state_count);
6800
6801 task_unlock(task);
6802 return ret;
6803 }
6804
6805 /*
6806 * Examine the default (machine-dependent) initial thread state
6807 * on the task, as set by task_set_state(). Flavors and structures
6808 * are exactly the same as those passed to thread_get_state().
6809 */
6810 kern_return_t
task_get_state(task_t task,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)6811 task_get_state(
6812 task_t task,
6813 int flavor,
6814 thread_state_t state,
6815 mach_msg_type_number_t *state_count)
6816 {
6817 kern_return_t ret;
6818
6819 if (task == TASK_NULL) {
6820 return KERN_INVALID_ARGUMENT;
6821 }
6822
6823 task_lock(task);
6824
6825 if (!task->active) {
6826 task_unlock(task);
6827 return KERN_FAILURE;
6828 }
6829
6830 ret = machine_task_get_state(task, flavor, state, state_count);
6831
6832 task_unlock(task);
6833 return ret;
6834 }
6835
6836
6837 static kern_return_t __attribute__((noinline, not_tail_called))
PROC_VIOLATED_GUARD__SEND_EXC_GUARD(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,boolean_t backtrace_only)6838 PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6839 mach_exception_code_t code,
6840 mach_exception_subcode_t subcode,
6841 void *reason,
6842 boolean_t backtrace_only)
6843 {
6844 #ifdef MACH_BSD
6845 if (1 == proc_selfpid()) {
6846 return KERN_NOT_SUPPORTED; // initproc is immune
6847 }
6848 #endif
6849 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6850 [0] = code,
6851 [1] = subcode,
6852 };
6853 task_t task = current_task();
6854 kern_return_t kr;
6855 void *bsd_info = get_bsdtask_info(task);
6856
6857 /* (See jetsam-related comments below) */
6858
6859 proc_memstat_skip(bsd_info, TRUE);
6860 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason, backtrace_only);
6861 proc_memstat_skip(bsd_info, FALSE);
6862 return kr;
6863 }
6864
6865 kern_return_t
task_violated_guard(mach_exception_code_t code,mach_exception_subcode_t subcode,void * reason,bool backtrace_only)6866 task_violated_guard(
6867 mach_exception_code_t code,
6868 mach_exception_subcode_t subcode,
6869 void *reason,
6870 bool backtrace_only)
6871 {
6872 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6873 }
6874
6875
6876 #if CONFIG_MEMORYSTATUS
6877
6878 boolean_t
task_get_memlimit_is_active(task_t task)6879 task_get_memlimit_is_active(task_t task)
6880 {
6881 assert(task != NULL);
6882
6883 if (task->memlimit_is_active == 1) {
6884 return TRUE;
6885 } else {
6886 return FALSE;
6887 }
6888 }
6889
6890 void
task_set_memlimit_is_active(task_t task,boolean_t memlimit_is_active)6891 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6892 {
6893 assert(task != NULL);
6894
6895 if (memlimit_is_active) {
6896 task->memlimit_is_active = 1;
6897 } else {
6898 task->memlimit_is_active = 0;
6899 }
6900 }
6901
6902 boolean_t
task_get_memlimit_is_fatal(task_t task)6903 task_get_memlimit_is_fatal(task_t task)
6904 {
6905 assert(task != NULL);
6906
6907 if (task->memlimit_is_fatal == 1) {
6908 return TRUE;
6909 } else {
6910 return FALSE;
6911 }
6912 }
6913
6914 void
task_set_memlimit_is_fatal(task_t task,boolean_t memlimit_is_fatal)6915 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6916 {
6917 assert(task != NULL);
6918
6919 if (memlimit_is_fatal) {
6920 task->memlimit_is_fatal = 1;
6921 } else {
6922 task->memlimit_is_fatal = 0;
6923 }
6924 }
6925
6926 uint64_t
task_get_dirty_start(task_t task)6927 task_get_dirty_start(task_t task)
6928 {
6929 return task->memstat_dirty_start;
6930 }
6931
6932 void
task_set_dirty_start(task_t task,uint64_t start)6933 task_set_dirty_start(task_t task, uint64_t start)
6934 {
6935 task_lock(task);
6936 task->memstat_dirty_start = start;
6937 task_unlock(task);
6938 }
6939
6940 boolean_t
task_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6941 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6942 {
6943 boolean_t triggered = FALSE;
6944
6945 assert(task == current_task());
6946
6947 /*
6948 * Returns true, if task has already triggered an exc_resource exception.
6949 */
6950
6951 if (memlimit_is_active) {
6952 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6953 } else {
6954 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6955 }
6956
6957 return triggered;
6958 }
6959
6960 void
task_mark_has_triggered_exc_resource(task_t task,boolean_t memlimit_is_active)6961 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6962 {
6963 assert(task == current_task());
6964
6965 /*
6966 * We allow one exc_resource per process per active/inactive limit.
6967 * The limit's fatal attribute does not come into play.
6968 */
6969
6970 if (memlimit_is_active) {
6971 task->memlimit_active_exc_resource = 1;
6972 } else {
6973 task->memlimit_inactive_exc_resource = 1;
6974 }
6975 }
6976
6977 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6978
6979 void __attribute__((noinline))
PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb,send_exec_resource_options_t exception_options)6980 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options)
6981 {
6982 task_t task = current_task();
6983 int pid = 0;
6984 const char *procname = "unknown";
6985 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6986 boolean_t send_sync_exc_resource = FALSE;
6987 void *cur_bsd_info = get_bsdtask_info(current_task());
6988
6989 #ifdef MACH_BSD
6990 pid = proc_selfpid();
6991
6992 if (pid == 1) {
6993 /*
6994 * Cannot have ReportCrash analyzing
6995 * a suspended initproc.
6996 */
6997 return;
6998 }
6999
7000 if (cur_bsd_info != NULL) {
7001 procname = proc_name_address(cur_bsd_info);
7002 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(cur_bsd_info);
7003 }
7004 #endif
7005 #if CONFIG_COREDUMP
7006 if (hwm_user_cores) {
7007 int error;
7008 uint64_t starttime, end;
7009 clock_sec_t secs = 0;
7010 uint32_t microsecs = 0;
7011
7012 starttime = mach_absolute_time();
7013 /*
7014 * Trigger a coredump of this process. Don't proceed unless we know we won't
7015 * be filling up the disk; and ignore the core size resource limit for this
7016 * core file.
7017 */
7018 if ((error = coredump(cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
7019 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
7020 }
7021 /*
7022 * coredump() leaves the task suspended.
7023 */
7024 task_resume_internal(current_task());
7025
7026 end = mach_absolute_time();
7027 absolutetime_to_microtime(end - starttime, &secs, µsecs);
7028 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
7029 proc_name_address(cur_bsd_info), pid, (int)secs, microsecs);
7030 }
7031 #endif /* CONFIG_COREDUMP */
7032
7033 if (disable_exc_resource) {
7034 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7035 "suppressed by a boot-arg.\n", procname, pid, max_footprint_mb);
7036 return;
7037 }
7038 printf("process %s [%d] crossed memory %s (%d MB); EXC_RESOURCE "
7039 "\n", procname, pid, (!(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? "high watermark" : "diagnostics limit"), max_footprint_mb);
7040
7041 /*
7042 * A task that has triggered an EXC_RESOURCE, should not be
7043 * jetsammed when the device is under memory pressure. Here
7044 * we set the P_MEMSTAT_SKIP flag so that the process
7045 * will be skipped if the memorystatus_thread wakes up.
7046 *
7047 * This is a debugging aid to ensure we can get a corpse before
7048 * the jetsam thread kills the process.
7049 * Note that proc_memstat_skip is a no-op on release kernels.
7050 */
7051 proc_memstat_skip(cur_bsd_info, TRUE);
7052
7053 code[0] = code[1] = 0;
7054 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
7055 /*
7056 * Regardless if there was a diag memlimit violation, fatal exceptions shall be notified always
7057 * as high level watermaks. In another words, if there was a diag limit and a watermark, and the
7058 * violation if for limit watermark, a watermark shall be reported.
7059 */
7060 if (!(exception_options & EXEC_RESOURCE_FATAL)) {
7061 EXC_RESOURCE_ENCODE_FLAVOR(code[0], !(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? FLAVOR_HIGH_WATERMARK : FLAVOR_DIAG_MEMLIMIT);
7062 } else {
7063 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK );
7064 }
7065 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
7066 /*
7067 * Do not generate a corpse fork if the violation is a fatal one
7068 * or the process wants synchronous EXC_RESOURCE exceptions.
7069 */
7070 if ((exception_options & EXEC_RESOURCE_FATAL) || send_sync_exc_resource || !exc_via_corpse_forking) {
7071 if (exception_options & EXEC_RESOURCE_FATAL) {
7072 vm_map_set_corpse_source(task->map);
7073 }
7074
7075 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
7076 if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
7077 /*
7078 * Use the _internal_ variant so that no user-space
7079 * process can resume our task from under us.
7080 */
7081 task_suspend_internal(task);
7082 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7083 task_resume_internal(task);
7084 }
7085 } else {
7086 if (disable_exc_resource_during_audio && audio_active) {
7087 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7088 "suppressed due to audio playback.\n", procname, pid, max_footprint_mb);
7089 } else {
7090 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
7091 code, EXCEPTION_CODE_MAX, NULL, FALSE);
7092 }
7093 }
7094
7095 /*
7096 * After the EXC_RESOURCE has been handled, we must clear the
7097 * P_MEMSTAT_SKIP flag so that the process can again be
7098 * considered for jetsam if the memorystatus_thread wakes up.
7099 */
7100 proc_memstat_skip(cur_bsd_info, FALSE); /* clear the flag */
7101 }
7102 /*
7103 * Callback invoked when a task exceeds its physical footprint limit.
7104 */
7105 void
task_footprint_exceeded(int warning,__unused const void * param0,__unused const void * param1)7106 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7107 {
7108 ledger_amount_t max_footprint = 0;
7109 ledger_amount_t max_footprint_mb = 0;
7110 #if DEBUG || DEVELOPMENT
7111 ledger_amount_t diag_threshold_limit_mb = 0;
7112 ledger_amount_t diag_threshold_limit = 0;
7113 #endif
7114 #if CONFIG_DEFERRED_RECLAIM
7115 ledger_amount_t current_footprint;
7116 #endif /* CONFIG_DEFERRED_RECLAIM */
7117 task_t task;
7118 send_exec_resource_is_warning is_warning = IS_NOT_WARNING;
7119 boolean_t memlimit_is_active;
7120 send_exec_resource_is_fatal memlimit_is_fatal;
7121 send_exec_resource_is_diagnostics is_diag_mem_threshold = IS_NOT_DIAGNOSTICS;
7122 if (warning == LEDGER_WARNING_DIAG_MEM_THRESHOLD) {
7123 is_diag_mem_threshold = IS_DIAGNOSTICS;
7124 is_warning = IS_WARNING;
7125 } else if (warning == LEDGER_WARNING_DIPPED_BELOW) {
7126 /*
7127 * Task memory limits only provide a warning on the way up.
7128 */
7129 return;
7130 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7131 /*
7132 * This task is in danger of violating a memory limit,
7133 * It has exceeded a percentage level of the limit.
7134 */
7135 is_warning = IS_WARNING;
7136 } else {
7137 /*
7138 * The task has exceeded the physical footprint limit.
7139 * This is not a warning but a true limit violation.
7140 */
7141 is_warning = IS_NOT_WARNING;
7142 }
7143
7144 task = current_task();
7145
7146 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
7147 #if DEBUG || DEVELOPMENT
7148 ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &diag_threshold_limit);
7149 #endif
7150 #if CONFIG_DEFERRED_RECLAIM
7151 if (task->deferred_reclamation_metadata != NULL) {
7152 /*
7153 * Task is enrolled in deferred reclamation.
7154 * Do a reclaim to ensure it's really over its limit.
7155 */
7156 vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
7157 ledger_get_balance(task->ledger, task_ledgers.phys_footprint, ¤t_footprint);
7158 if (current_footprint < max_footprint) {
7159 return;
7160 }
7161 }
7162 #endif /* CONFIG_DEFERRED_RECLAIM */
7163 max_footprint_mb = max_footprint >> 20;
7164 #if DEBUG || DEVELOPMENT
7165 diag_threshold_limit_mb = diag_threshold_limit >> 20;
7166 #endif
7167 memlimit_is_active = task_get_memlimit_is_active(task);
7168 memlimit_is_fatal = task_get_memlimit_is_fatal(task) == FALSE ? IS_NOT_FATAL : IS_FATAL;
7169 #if DEBUG || DEVELOPMENT
7170 if (is_diag_mem_threshold == IS_NOT_DIAGNOSTICS) {
7171 task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7172 } else {
7173 task_process_crossed_limit_diag(diag_threshold_limit_mb);
7174 }
7175 #else
7176 task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7177 #endif
7178 }
7179
7180 /*
7181 * Actions to perfrom when a process has crossed watermark or is a fatal consumption */
7182 static inline void
task_process_crossed_limit_no_diag(task_t task,ledger_amount_t ledger_limit_size,bool memlimit_is_fatal,bool memlimit_is_active,send_exec_resource_is_warning is_warning)7183 task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning)
7184 {
7185 send_exec_resource_options_t exception_options = 0;
7186 if (memlimit_is_fatal) {
7187 exception_options |= EXEC_RESOURCE_FATAL;
7188 }
7189 /*
7190 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7191 * We only generate the exception once per process per memlimit (active/inactive limit).
7192 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
7193 * and we disable it by marking that memlimit as exception triggered.
7194 */
7195 if (is_warning == IS_NOT_WARNING && !task_has_triggered_exc_resource(task, memlimit_is_active)) {
7196 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7197 // If it was not a diag threshold (if was a memory limit), then we do not want more signalling,
7198 // however, if was a diag limit, the user may reload a different limit and signal again the violation
7199 memorystatus_log_exception((int)ledger_limit_size, memlimit_is_active, memlimit_is_fatal);
7200 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
7201 }
7202 memorystatus_on_ledger_footprint_exceeded(is_warning == IS_NOT_WARNING ? FALSE : TRUE, memlimit_is_active, memlimit_is_fatal);
7203 }
7204
7205 #if DEBUG || DEVELOPMENT
7206 /**
7207 * Actions to take when a process has crossed the diagnostics limit
7208 */
7209 static inline void
task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)7210 task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)
7211 {
7212 /*
7213 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7214 * In the case of the diagnostics thresholds, the exception will be signaled only once, but the
7215 * inhibit / rearm mechanism if performed at ledger level.
7216 */
7217 send_exec_resource_options_t exception_options = EXEC_RESOURCE_DIAGNOSTIC;
7218 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7219 memorystatus_log_diag_threshold_exception((int)ledger_limit_size);
7220 }
7221 #endif
7222
7223 extern int proc_check_footprint_priv(void);
7224
7225 kern_return_t
task_set_phys_footprint_limit(task_t task,int new_limit_mb,int * old_limit_mb)7226 task_set_phys_footprint_limit(
7227 task_t task,
7228 int new_limit_mb,
7229 int *old_limit_mb)
7230 {
7231 kern_return_t error;
7232
7233 boolean_t memlimit_is_active;
7234 boolean_t memlimit_is_fatal;
7235
7236 if ((error = proc_check_footprint_priv())) {
7237 return KERN_NO_ACCESS;
7238 }
7239
7240 /*
7241 * This call should probably be obsoleted.
7242 * But for now, we default to current state.
7243 */
7244 memlimit_is_active = task_get_memlimit_is_active(task);
7245 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
7246
7247 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
7248 }
7249
7250 /*
7251 * Set the limit of diagnostics memory consumption for a concrete task
7252 */
7253 #if CONFIG_MEMORYSTATUS
7254 #if DEVELOPMENT || DEBUG
7255 kern_return_t
task_set_diag_footprint_limit(task_t task,uint64_t new_limit_mb,uint64_t * old_limit_mb)7256 task_set_diag_footprint_limit(
7257 task_t task,
7258 uint64_t new_limit_mb,
7259 uint64_t *old_limit_mb)
7260 {
7261 kern_return_t error;
7262
7263 if ((error = proc_check_footprint_priv())) {
7264 return KERN_NO_ACCESS;
7265 }
7266
7267 return task_set_diag_footprint_limit_internal(task, new_limit_mb, old_limit_mb);
7268 }
7269
7270 #endif // DEVELOPMENT || DEBUG
7271 #endif // CONFIG_MEMORYSTATUS
7272
7273 kern_return_t
task_convert_phys_footprint_limit(int limit_mb,int * converted_limit_mb)7274 task_convert_phys_footprint_limit(
7275 int limit_mb,
7276 int *converted_limit_mb)
7277 {
7278 if (limit_mb == -1) {
7279 /*
7280 * No limit
7281 */
7282 if (max_task_footprint != 0) {
7283 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
7284 } else {
7285 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
7286 }
7287 } else {
7288 /* nothing to convert */
7289 *converted_limit_mb = limit_mb;
7290 }
7291 return KERN_SUCCESS;
7292 }
7293
7294 kern_return_t
task_set_phys_footprint_limit_internal(task_t task,int new_limit_mb,int * old_limit_mb,boolean_t memlimit_is_active,boolean_t memlimit_is_fatal)7295 task_set_phys_footprint_limit_internal(
7296 task_t task,
7297 int new_limit_mb,
7298 int *old_limit_mb,
7299 boolean_t memlimit_is_active,
7300 boolean_t memlimit_is_fatal)
7301 {
7302 ledger_amount_t old;
7303 kern_return_t ret;
7304 #if DEVELOPMENT || DEBUG
7305 diagthreshold_check_return diag_threshold_validity;
7306 #endif
7307 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
7308
7309 if (ret != KERN_SUCCESS) {
7310 return ret;
7311 }
7312 /**
7313 * Maybe we will need to re-enable the diag threshold, lets get the value
7314 * and the current status
7315 */
7316 #if DEVELOPMENT || DEBUG
7317 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_mb, false);
7318 /**
7319 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7320 */
7321 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7322 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7323 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7324 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7325 }
7326 #endif
7327
7328 /*
7329 * Check that limit >> 20 will not give an "unexpected" 32-bit
7330 * result. There are, however, implicit assumptions that -1 mb limit
7331 * equates to LEDGER_LIMIT_INFINITY.
7332 */
7333 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
7334
7335 if (old_limit_mb) {
7336 *old_limit_mb = (int)(old >> 20);
7337 }
7338
7339 if (new_limit_mb == -1) {
7340 /*
7341 * Caller wishes to remove the limit.
7342 */
7343 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7344 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7345 max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
7346
7347 task_lock(task);
7348 task_set_memlimit_is_active(task, memlimit_is_active);
7349 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7350 task_unlock(task);
7351 /**
7352 * If the diagnostics were disabled, and now we have a new limit, we have to re-enable it.
7353 */
7354 #if DEVELOPMENT || DEBUG
7355 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7356 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7357 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7358 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7359 }
7360 #endif
7361 return KERN_SUCCESS;
7362 }
7363
7364 #ifdef CONFIG_NOMONITORS
7365 return KERN_SUCCESS;
7366 #endif /* CONFIG_NOMONITORS */
7367
7368 task_lock(task);
7369
7370 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7371 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7372 (((ledger_amount_t)new_limit_mb << 20) == old)) {
7373 /*
7374 * memlimit state is not changing
7375 */
7376 task_unlock(task);
7377 return KERN_SUCCESS;
7378 }
7379
7380 task_set_memlimit_is_active(task, memlimit_is_active);
7381 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7382
7383 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
7384 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
7385
7386 if (task == current_task()) {
7387 ledger_check_new_balance(current_thread(), task->ledger,
7388 task_ledgers.phys_footprint);
7389 }
7390
7391 task_unlock(task);
7392 #if DEVELOPMENT || DEBUG
7393 if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7394 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7395 }
7396 #endif
7397
7398 return KERN_SUCCESS;
7399 }
7400
7401 #if RESETTABLE_DIAG_FOOTPRINT_LIMITS
7402 kern_return_t
task_set_diag_footprint_limit_internal(task_t task,uint64_t new_limit_bytes,uint64_t * old_limit_bytes)7403 task_set_diag_footprint_limit_internal(
7404 task_t task,
7405 uint64_t new_limit_bytes,
7406 uint64_t *old_limit_bytes)
7407 {
7408 ledger_amount_t old = 0;
7409 kern_return_t ret = KERN_SUCCESS;
7410 diagthreshold_check_return diag_threshold_validity;
7411 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &old);
7412
7413 if (ret != KERN_SUCCESS) {
7414 return ret;
7415 }
7416 /**
7417 * Maybe we will need to re-enable the diag threshold, lets get the value
7418 * and the current status
7419 */
7420 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_bytes >> 20, true);
7421 /**
7422 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7423 */
7424 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7425 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7426 }
7427
7428 /*
7429 * Check that limit >> 20 will not give an "unexpected" 32-bit
7430 * result. There are, however, implicit assumptions that -1 mb limit
7431 * equates to LEDGER_LIMIT_INFINITY.
7432 */
7433 if (old_limit_bytes) {
7434 *old_limit_bytes = old;
7435 }
7436
7437 if (new_limit_bytes == -1) {
7438 /*
7439 * Caller wishes to remove the limit.
7440 */
7441 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7442 LEDGER_LIMIT_INFINITY);
7443 /*
7444 * If the memory diagnostics flag was disabled, lets enable it again
7445 */
7446 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7447 return KERN_SUCCESS;
7448 }
7449
7450 #ifdef CONFIG_NOMONITORS
7451 return KERN_SUCCESS;
7452 #else
7453
7454 task_lock(task);
7455 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7456 (ledger_amount_t)new_limit_bytes );
7457 if (task == current_task()) {
7458 ledger_check_new_balance(current_thread(), task->ledger,
7459 task_ledgers.phys_footprint);
7460 }
7461
7462 task_unlock(task);
7463 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7464 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7465 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7466 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7467 }
7468
7469 return KERN_SUCCESS;
7470 #endif /* CONFIG_NOMONITORS */
7471 }
7472
7473 kern_return_t
task_get_diag_footprint_limit_internal(task_t task,uint64_t * new_limit_bytes,bool * threshold_disabled)7474 task_get_diag_footprint_limit_internal(
7475 task_t task,
7476 uint64_t *new_limit_bytes,
7477 bool *threshold_disabled)
7478 {
7479 ledger_amount_t ledger_limit;
7480 kern_return_t ret = KERN_SUCCESS;
7481 if (new_limit_bytes == NULL || threshold_disabled == NULL) {
7482 return KERN_INVALID_ARGUMENT;
7483 }
7484 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &ledger_limit);
7485 if (ledger_limit == LEDGER_LIMIT_INFINITY) {
7486 ledger_limit = -1;
7487 }
7488 if (ret == KERN_SUCCESS) {
7489 *new_limit_bytes = ledger_limit;
7490 ret = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, threshold_disabled);
7491 }
7492 return ret;
7493 }
7494 #endif /* RESETTABLE_DIAG_FOOTPRINT_LIMITS */
7495
7496
7497 kern_return_t
task_get_phys_footprint_limit(task_t task,int * limit_mb)7498 task_get_phys_footprint_limit(
7499 task_t task,
7500 int *limit_mb)
7501 {
7502 ledger_amount_t limit;
7503 kern_return_t ret;
7504
7505 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
7506 if (ret != KERN_SUCCESS) {
7507 return ret;
7508 }
7509
7510 /*
7511 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7512 * result. There are, however, implicit assumptions that -1 mb limit
7513 * equates to LEDGER_LIMIT_INFINITY.
7514 */
7515 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7516 *limit_mb = (int)(limit >> 20);
7517
7518 return KERN_SUCCESS;
7519 }
7520 #else /* CONFIG_MEMORYSTATUS */
7521 kern_return_t
task_set_phys_footprint_limit(__unused task_t task,__unused int new_limit_mb,__unused int * old_limit_mb)7522 task_set_phys_footprint_limit(
7523 __unused task_t task,
7524 __unused int new_limit_mb,
7525 __unused int *old_limit_mb)
7526 {
7527 return KERN_FAILURE;
7528 }
7529
7530 kern_return_t
task_get_phys_footprint_limit(__unused task_t task,__unused int * limit_mb)7531 task_get_phys_footprint_limit(
7532 __unused task_t task,
7533 __unused int *limit_mb)
7534 {
7535 return KERN_FAILURE;
7536 }
7537 #endif /* CONFIG_MEMORYSTATUS */
7538
7539 security_token_t *
task_get_sec_token(task_t task)7540 task_get_sec_token(task_t task)
7541 {
7542 return &task_get_ro(task)->task_tokens.sec_token;
7543 }
7544
7545 void
task_set_sec_token(task_t task,security_token_t * token)7546 task_set_sec_token(task_t task, security_token_t *token)
7547 {
7548 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7549 task_tokens.sec_token, token);
7550 }
7551
7552 audit_token_t *
task_get_audit_token(task_t task)7553 task_get_audit_token(task_t task)
7554 {
7555 return &task_get_ro(task)->task_tokens.audit_token;
7556 }
7557
7558 void
task_set_audit_token(task_t task,audit_token_t * token)7559 task_set_audit_token(task_t task, audit_token_t *token)
7560 {
7561 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7562 task_tokens.audit_token, token);
7563 }
7564
7565 void
task_set_tokens(task_t task,security_token_t * sec_token,audit_token_t * audit_token)7566 task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7567 {
7568 struct task_token_ro_data tokens;
7569
7570 tokens = task_get_ro(task)->task_tokens;
7571 tokens.sec_token = *sec_token;
7572 tokens.audit_token = *audit_token;
7573
7574 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7575 &tokens);
7576 }
7577
7578 boolean_t
task_is_privileged(task_t task)7579 task_is_privileged(task_t task)
7580 {
7581 return task_get_sec_token(task)->val[0] == 0;
7582 }
7583
7584 #ifdef CONFIG_MACF
7585 uint8_t *
task_get_mach_trap_filter_mask(task_t task)7586 task_get_mach_trap_filter_mask(task_t task)
7587 {
7588 return task_get_ro(task)->task_filters.mach_trap_filter_mask;
7589 }
7590
7591 void
task_set_mach_trap_filter_mask(task_t task,uint8_t * mask)7592 task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7593 {
7594 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7595 task_filters.mach_trap_filter_mask, &mask);
7596 }
7597
7598 uint8_t *
task_get_mach_kobj_filter_mask(task_t task)7599 task_get_mach_kobj_filter_mask(task_t task)
7600 {
7601 return task_get_ro(task)->task_filters.mach_kobj_filter_mask;
7602 }
7603
7604 mach_vm_address_t
task_get_all_image_info_addr(task_t task)7605 task_get_all_image_info_addr(task_t task)
7606 {
7607 return task->all_image_info_addr;
7608 }
7609
7610 void
task_set_mach_kobj_filter_mask(task_t task,uint8_t * mask)7611 task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7612 {
7613 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7614 task_filters.mach_kobj_filter_mask, &mask);
7615 }
7616
7617 #endif /* CONFIG_MACF */
7618
7619 void
task_set_thread_limit(task_t task,uint16_t thread_limit)7620 task_set_thread_limit(task_t task, uint16_t thread_limit)
7621 {
7622 assert(task != kernel_task);
7623 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7624 task_lock(task);
7625 task->task_thread_limit = thread_limit;
7626 task_unlock(task);
7627 }
7628 }
7629
7630 #if CONFIG_PROC_RESOURCE_LIMITS
7631 kern_return_t
task_set_port_space_limits(task_t task,uint32_t soft_limit,uint32_t hard_limit)7632 task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7633 {
7634 return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7635 }
7636 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
7637
7638 #if XNU_TARGET_OS_OSX
7639 boolean_t
task_has_system_version_compat_enabled(task_t task)7640 task_has_system_version_compat_enabled(task_t task)
7641 {
7642 boolean_t enabled = FALSE;
7643
7644 task_lock(task);
7645 enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7646 task_unlock(task);
7647
7648 return enabled;
7649 }
7650
7651 void
task_set_system_version_compat_enabled(task_t task,boolean_t enable_system_version_compat)7652 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7653 {
7654 assert(task == current_task());
7655 assert(task != kernel_task);
7656
7657 task_lock(task);
7658 if (enable_system_version_compat) {
7659 task->t_flags |= TF_SYS_VERSION_COMPAT;
7660 } else {
7661 task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7662 }
7663 task_unlock(task);
7664 }
7665 #endif /* XNU_TARGET_OS_OSX */
7666
7667 /*
7668 * We need to export some functions to other components that
7669 * are currently implemented in macros within the osfmk
7670 * component. Just export them as functions of the same name.
7671 */
7672 boolean_t
is_kerneltask(task_t t)7673 is_kerneltask(task_t t)
7674 {
7675 if (t == kernel_task) {
7676 return TRUE;
7677 }
7678
7679 return FALSE;
7680 }
7681
7682 boolean_t
is_corpsefork(task_t t)7683 is_corpsefork(task_t t)
7684 {
7685 return task_is_a_corpse_fork(t);
7686 }
7687
7688 task_t
current_task_early(void)7689 current_task_early(void)
7690 {
7691 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7692 if (current_thread()->t_tro == NULL) {
7693 return TASK_NULL;
7694 }
7695 }
7696 return get_threadtask(current_thread());
7697 }
7698
7699 task_t
current_task(void)7700 current_task(void)
7701 {
7702 return get_threadtask(current_thread());
7703 }
7704
7705 /* defined in bsd/kern/kern_prot.c */
7706 extern int get_audit_token_pid(audit_token_t *audit_token);
7707
7708 int
task_pid(task_t task)7709 task_pid(task_t task)
7710 {
7711 if (task) {
7712 return get_audit_token_pid(task_get_audit_token(task));
7713 }
7714 return -1;
7715 }
7716
7717 #if __has_feature(ptrauth_calls)
7718 /*
7719 * Get the shared region id and jop signing key for the task.
7720 * The function will allocate a kalloc buffer and return
7721 * it to caller, the caller needs to free it. This is used
7722 * for getting the information via task port.
7723 */
7724 char *
task_get_vm_shared_region_id_and_jop_pid(task_t task,uint64_t * jop_pid)7725 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7726 {
7727 size_t len;
7728 char *shared_region_id = NULL;
7729
7730 task_lock(task);
7731 if (task->shared_region_id == NULL) {
7732 task_unlock(task);
7733 return NULL;
7734 }
7735 len = strlen(task->shared_region_id) + 1;
7736
7737 /* don't hold task lock while allocating */
7738 task_unlock(task);
7739 shared_region_id = kalloc_data(len, Z_WAITOK);
7740 task_lock(task);
7741
7742 if (task->shared_region_id == NULL) {
7743 task_unlock(task);
7744 kfree_data(shared_region_id, len);
7745 return NULL;
7746 }
7747 assert(len == strlen(task->shared_region_id) + 1); /* should never change */
7748 strlcpy(shared_region_id, task->shared_region_id, len);
7749 task_unlock(task);
7750
7751 /* find key from its auth pager */
7752 if (jop_pid != NULL) {
7753 *jop_pid = shared_region_find_key(shared_region_id);
7754 }
7755
7756 return shared_region_id;
7757 }
7758
7759 /*
7760 * set the shared region id for a task
7761 */
7762 void
task_set_shared_region_id(task_t task,char * id)7763 task_set_shared_region_id(task_t task, char *id)
7764 {
7765 char *old_id;
7766
7767 task_lock(task);
7768 old_id = task->shared_region_id;
7769 task->shared_region_id = id;
7770 task->shared_region_auth_remapped = FALSE;
7771 task_unlock(task);
7772
7773 /* free any pre-existing shared region id */
7774 if (old_id != NULL) {
7775 shared_region_key_dealloc(old_id);
7776 kfree_data(old_id, strlen(old_id) + 1);
7777 }
7778 }
7779 #endif /* __has_feature(ptrauth_calls) */
7780
7781 /*
7782 * This routine finds a thread in a task by its unique id
7783 * Returns a referenced thread or THREAD_NULL if the thread was not found
7784 *
7785 * TODO: This is super inefficient - it's an O(threads in task) list walk!
7786 * We should make a tid hash, or transition all tid clients to thread ports
7787 *
7788 * Precondition: No locks held (will take task lock)
7789 */
7790 thread_t
task_findtid(task_t task,uint64_t tid)7791 task_findtid(task_t task, uint64_t tid)
7792 {
7793 thread_t self = current_thread();
7794 thread_t found_thread = THREAD_NULL;
7795 thread_t iter_thread = THREAD_NULL;
7796
7797 /* Short-circuit the lookup if we're looking up ourselves */
7798 if (tid == self->thread_id || tid == TID_NULL) {
7799 assert(get_threadtask(self) == task);
7800
7801 thread_reference(self);
7802
7803 return self;
7804 }
7805
7806 task_lock(task);
7807
7808 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7809 if (iter_thread->thread_id == tid) {
7810 found_thread = iter_thread;
7811 thread_reference(found_thread);
7812 break;
7813 }
7814 }
7815
7816 task_unlock(task);
7817
7818 return found_thread;
7819 }
7820
7821 int
pid_from_task(task_t task)7822 pid_from_task(task_t task)
7823 {
7824 int pid = -1;
7825 void *bsd_info = get_bsdtask_info(task);
7826
7827 if (bsd_info) {
7828 pid = proc_pid(bsd_info);
7829 } else {
7830 pid = task_pid(task);
7831 }
7832
7833 return pid;
7834 }
7835
7836 /*
7837 * Control the CPU usage monitor for a task.
7838 */
7839 kern_return_t
task_cpu_usage_monitor_ctl(task_t task,uint32_t * flags)7840 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7841 {
7842 int error = KERN_SUCCESS;
7843
7844 if (*flags & CPUMON_MAKE_FATAL) {
7845 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7846 } else {
7847 error = KERN_INVALID_ARGUMENT;
7848 }
7849
7850 return error;
7851 }
7852
7853 /*
7854 * Control the wakeups monitor for a task.
7855 */
7856 kern_return_t
task_wakeups_monitor_ctl(task_t task,uint32_t * flags,int32_t * rate_hz)7857 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7858 {
7859 ledger_t ledger = task->ledger;
7860
7861 task_lock(task);
7862 if (*flags & WAKEMON_GET_PARAMS) {
7863 ledger_amount_t limit;
7864 uint64_t period;
7865
7866 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7867 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7868
7869 if (limit != LEDGER_LIMIT_INFINITY) {
7870 /*
7871 * An active limit means the wakeups monitor is enabled.
7872 */
7873 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7874 *flags = WAKEMON_ENABLE;
7875 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7876 *flags |= WAKEMON_MAKE_FATAL;
7877 }
7878 } else {
7879 *flags = WAKEMON_DISABLE;
7880 *rate_hz = -1;
7881 }
7882
7883 /*
7884 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7885 */
7886 task_unlock(task);
7887 return KERN_SUCCESS;
7888 }
7889
7890 if (*flags & WAKEMON_ENABLE) {
7891 if (*flags & WAKEMON_SET_DEFAULTS) {
7892 *rate_hz = task_wakeups_monitor_rate;
7893 }
7894
7895 #ifndef CONFIG_NOMONITORS
7896 if (*flags & WAKEMON_MAKE_FATAL) {
7897 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7898 }
7899 #endif /* CONFIG_NOMONITORS */
7900
7901 if (*rate_hz <= 0) {
7902 task_unlock(task);
7903 return KERN_INVALID_ARGUMENT;
7904 }
7905
7906 #ifndef CONFIG_NOMONITORS
7907 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7908 (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7909 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7910 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7911 #endif /* CONFIG_NOMONITORS */
7912 } else if (*flags & WAKEMON_DISABLE) {
7913 /*
7914 * Caller wishes to disable wakeups monitor on the task.
7915 *
7916 * Disable telemetry if it was triggered by the wakeups monitor, and
7917 * remove the limit & callback on the wakeups ledger entry.
7918 */
7919 #if CONFIG_TELEMETRY
7920 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
7921 #endif
7922 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
7923 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
7924 }
7925
7926 task_unlock(task);
7927 return KERN_SUCCESS;
7928 }
7929
7930 void
task_wakeups_rate_exceeded(int warning,__unused const void * param0,__unused const void * param1)7931 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7932 {
7933 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7934 #if CONFIG_TELEMETRY
7935 /*
7936 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
7937 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
7938 */
7939 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
7940 #endif
7941 return;
7942 }
7943
7944 #if CONFIG_TELEMETRY
7945 /*
7946 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
7947 * exceeded the limit, turn telemetry off for the task.
7948 */
7949 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
7950 #endif
7951
7952 if (warning == 0) {
7953 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
7954 }
7955 }
7956
7957 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)7958 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
7959 {
7960 task_t task = current_task();
7961 int pid = 0;
7962 const char *procname = "unknown";
7963 boolean_t fatal;
7964 kern_return_t kr;
7965 #ifdef EXC_RESOURCE_MONITORS
7966 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7967 #endif /* EXC_RESOURCE_MONITORS */
7968 struct ledger_entry_info lei;
7969
7970 #ifdef MACH_BSD
7971 pid = proc_selfpid();
7972 if (get_bsdtask_info(task) != NULL) {
7973 procname = proc_name_address(get_bsdtask_info(current_task()));
7974 }
7975 #endif
7976
7977 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
7978
7979 /*
7980 * Disable the exception notification so we don't overwhelm
7981 * the listener with an endless stream of redundant exceptions.
7982 * TODO: detect whether another thread is already reporting the violation.
7983 */
7984 uint32_t flags = WAKEMON_DISABLE;
7985 task_wakeups_monitor_ctl(task, &flags, NULL);
7986
7987 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7988 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
7989 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
7990 "over ~%llu seconds, averaging %llu wakes / second and "
7991 "violating a %slimit of %llu wakes over %llu seconds.\n",
7992 procname, pid,
7993 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
7994 lei.lei_last_refill == 0 ? 0 :
7995 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
7996 fatal ? "FATAL " : "",
7997 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
7998
7999 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
8000 fatal ? kRNFatalLimitFlag : 0);
8001 if (kr) {
8002 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
8003 }
8004
8005 #ifdef EXC_RESOURCE_MONITORS
8006 if (disable_exc_resource) {
8007 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8008 "suppressed by a boot-arg\n", procname, pid);
8009 return;
8010 }
8011 if (disable_exc_resource_during_audio && audio_active) {
8012 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8013 "suppressed due to audio playback\n", procname, pid);
8014 return;
8015 }
8016 if (lei.lei_last_refill == 0) {
8017 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8018 "suppressed due to lei.lei_last_refill = 0 \n", procname, pid);
8019 }
8020
8021 code[0] = code[1] = 0;
8022 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
8023 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
8024 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
8025 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
8026 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
8027 lei.lei_last_refill);
8028 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
8029 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
8030 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8031 #endif /* EXC_RESOURCE_MONITORS */
8032
8033 if (fatal) {
8034 task_terminate_internal(task);
8035 }
8036 }
8037
8038 static boolean_t
global_update_logical_writes(int64_t io_delta,int64_t * global_write_count)8039 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
8040 {
8041 int64_t old_count, new_count;
8042 boolean_t needs_telemetry;
8043
8044 do {
8045 new_count = old_count = *global_write_count;
8046 new_count += io_delta;
8047 if (new_count >= io_telemetry_limit) {
8048 new_count = 0;
8049 needs_telemetry = TRUE;
8050 } else {
8051 needs_telemetry = FALSE;
8052 }
8053 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
8054 return needs_telemetry;
8055 }
8056
8057 void
task_update_physical_writes(__unused task_t task,__unused task_physical_write_flavor_t flavor,__unused uint64_t io_size,__unused task_balance_flags_t flags)8058 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
8059 {
8060 #if CONFIG_PHYS_WRITE_ACCT
8061 if (!io_size) {
8062 return;
8063 }
8064
8065 /*
8066 * task == NULL means that we have to update kernel_task ledgers
8067 */
8068 if (!task) {
8069 task = kernel_task;
8070 }
8071
8072 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
8073 task_pid(task), flavor, io_size, flags, 0);
8074 DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
8075
8076 if (flags & TASK_BALANCE_CREDIT) {
8077 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8078 OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8079 ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8080 }
8081 } else if (flags & TASK_BALANCE_DEBIT) {
8082 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8083 OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8084 ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
8085 }
8086 }
8087 #endif /* CONFIG_PHYS_WRITE_ACCT */
8088 }
8089
8090 void
task_update_logical_writes(task_t task,uint32_t io_size,int flags,void * vp)8091 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
8092 {
8093 int64_t io_delta = 0;
8094 int64_t * global_counter_to_update;
8095 boolean_t needs_telemetry = FALSE;
8096 boolean_t is_external_device = FALSE;
8097 int ledger_to_update = 0;
8098 struct task_writes_counters * writes_counters_to_update;
8099
8100 if ((!task) || (!io_size) || (!vp)) {
8101 return;
8102 }
8103
8104 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
8105 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
8106 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
8107
8108 // Is the drive backing this vnode internal or external to the system?
8109 if (vnode_isonexternalstorage(vp) == false) {
8110 global_counter_to_update = &global_logical_writes_count;
8111 ledger_to_update = task_ledgers.logical_writes;
8112 writes_counters_to_update = &task->task_writes_counters_internal;
8113 is_external_device = FALSE;
8114 } else {
8115 global_counter_to_update = &global_logical_writes_to_external_count;
8116 ledger_to_update = task_ledgers.logical_writes_to_external;
8117 writes_counters_to_update = &task->task_writes_counters_external;
8118 is_external_device = TRUE;
8119 }
8120
8121 switch (flags) {
8122 case TASK_WRITE_IMMEDIATE:
8123 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
8124 ledger_credit(task->ledger, ledger_to_update, io_size);
8125 if (!is_external_device) {
8126 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8127 }
8128 break;
8129 case TASK_WRITE_DEFERRED:
8130 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
8131 ledger_credit(task->ledger, ledger_to_update, io_size);
8132 if (!is_external_device) {
8133 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8134 }
8135 break;
8136 case TASK_WRITE_INVALIDATED:
8137 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
8138 ledger_debit(task->ledger, ledger_to_update, io_size);
8139 if (!is_external_device) {
8140 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
8141 }
8142 break;
8143 case TASK_WRITE_METADATA:
8144 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
8145 ledger_credit(task->ledger, ledger_to_update, io_size);
8146 if (!is_external_device) {
8147 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8148 }
8149 break;
8150 }
8151
8152 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
8153 if (io_telemetry_limit != 0) {
8154 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
8155 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
8156 if (needs_telemetry && !is_external_device) {
8157 act_set_io_telemetry_ast(current_thread());
8158 }
8159 }
8160 }
8161
8162 /*
8163 * Control the I/O monitor for a task.
8164 */
8165 kern_return_t
task_io_monitor_ctl(task_t task,uint32_t * flags)8166 task_io_monitor_ctl(task_t task, uint32_t *flags)
8167 {
8168 ledger_t ledger = task->ledger;
8169
8170 task_lock(task);
8171 if (*flags & IOMON_ENABLE) {
8172 /* Configure the physical I/O ledger */
8173 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
8174 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
8175 } else if (*flags & IOMON_DISABLE) {
8176 /*
8177 * Caller wishes to disable I/O monitor on the task.
8178 */
8179 ledger_disable_refill(ledger, task_ledgers.physical_writes);
8180 ledger_disable_callback(ledger, task_ledgers.physical_writes);
8181 }
8182
8183 task_unlock(task);
8184 return KERN_SUCCESS;
8185 }
8186
8187 void
task_io_rate_exceeded(int warning,const void * param0,__unused const void * param1)8188 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
8189 {
8190 if (warning == 0) {
8191 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
8192 }
8193 }
8194
8195 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)8196 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
8197 {
8198 int pid = 0;
8199 task_t task = current_task();
8200 #ifdef EXC_RESOURCE_MONITORS
8201 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8202 #endif /* EXC_RESOURCE_MONITORS */
8203 struct ledger_entry_info lei = {};
8204 kern_return_t kr;
8205
8206 #ifdef MACH_BSD
8207 pid = proc_selfpid();
8208 #endif
8209 /*
8210 * Get the ledger entry info. We need to do this before disabling the exception
8211 * to get correct values for all fields.
8212 */
8213 switch (flavor) {
8214 case FLAVOR_IO_PHYSICAL_WRITES:
8215 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
8216 break;
8217 }
8218
8219
8220 /*
8221 * Disable the exception notification so we don't overwhelm
8222 * the listener with an endless stream of redundant exceptions.
8223 * TODO: detect whether another thread is already reporting the violation.
8224 */
8225 uint32_t flags = IOMON_DISABLE;
8226 task_io_monitor_ctl(task, &flags);
8227
8228 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
8229 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
8230 }
8231 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
8232 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
8233
8234 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
8235 if (kr) {
8236 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
8237 }
8238
8239 #ifdef EXC_RESOURCE_MONITORS
8240 code[0] = code[1] = 0;
8241 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
8242 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
8243 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
8244 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
8245 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
8246 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8247 #endif /* EXC_RESOURCE_MONITORS */
8248 }
8249
8250 void
task_port_space_ast(__unused task_t task)8251 task_port_space_ast(__unused task_t task)
8252 {
8253 uint32_t current_size, soft_limit, hard_limit;
8254 assert(task == current_task());
8255 kern_return_t ret = ipc_space_get_table_size_and_limits(task->itk_space,
8256 ¤t_size, &soft_limit, &hard_limit);
8257 if (ret == KERN_SUCCESS) {
8258 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
8259 }
8260 }
8261
8262 #if CONFIG_PROC_RESOURCE_LIMITS
8263 static mach_port_t
task_allocate_fatal_port(void)8264 task_allocate_fatal_port(void)
8265 {
8266 mach_port_t task_fatal_port = MACH_PORT_NULL;
8267 task_id_token_t token;
8268
8269 kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
8270 if (kr) {
8271 return MACH_PORT_NULL;
8272 }
8273 task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
8274 IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
8275
8276 task_id_token_set_port(token, task_fatal_port);
8277
8278 return task_fatal_port;
8279 }
8280
8281 static void
task_fatal_port_no_senders(ipc_port_t port,__unused mach_port_mscount_t mscount)8282 task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
8283 {
8284 task_t task = TASK_NULL;
8285 kern_return_t kr;
8286
8287 task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
8288
8289 assert(token != NULL);
8290 if (token) {
8291 kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
8292 if (task) {
8293 task_bsdtask_kill(task);
8294 task_deallocate(task);
8295 }
8296 task_id_token_release(token); /* consumes ref given by notification */
8297 }
8298 }
8299 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8300
8301 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task,uint32_t current_size,uint32_t soft_limit,uint32_t hard_limit)8302 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
8303 {
8304 int pid = 0;
8305 char *procname = (char *) "unknown";
8306 __unused kern_return_t kr;
8307 __unused resource_notify_flags_t flags = kRNFlagsNone;
8308 __unused uint32_t limit;
8309 __unused mach_port_t task_fatal_port = MACH_PORT_NULL;
8310 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8311
8312 #ifdef MACH_BSD
8313 pid = proc_selfpid();
8314 if (get_bsdtask_info(task) != NULL) {
8315 procname = proc_name_address(get_bsdtask_info(task));
8316 }
8317 #endif
8318 /*
8319 * Only kernel_task and launchd may be allowed to
8320 * have really large ipc space.
8321 */
8322 if (pid == 0 || pid == 1) {
8323 return;
8324 }
8325
8326 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
8327 Num of ports allocated %u; \n", procname, pid, current_size);
8328
8329 /* Abort the process if it has hit the system-wide limit for ipc port table size */
8330 if (!hard_limit && !soft_limit) {
8331 code[0] = code[1] = 0;
8332 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
8333 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
8334 EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
8335
8336 exit_with_port_space_exception(current_proc(), code[0], code[1]);
8337
8338 return;
8339 }
8340
8341 #if CONFIG_PROC_RESOURCE_LIMITS
8342 if (hard_limit > 0) {
8343 flags |= kRNHardLimitFlag;
8344 limit = hard_limit;
8345 task_fatal_port = task_allocate_fatal_port();
8346 if (!task_fatal_port) {
8347 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8348 task_bsdtask_kill(task);
8349 }
8350 } else {
8351 flags |= kRNSoftLimitFlag;
8352 limit = soft_limit;
8353 }
8354
8355 kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8356 if (kr) {
8357 os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
8358 }
8359 if (task_fatal_port) {
8360 ipc_port_release_send(task_fatal_port);
8361 }
8362 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8363 }
8364
8365 void
task_filedesc_ast(__unused task_t task,__unused int current_size,__unused int soft_limit,__unused int hard_limit)8366 task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
8367 {
8368 #if CONFIG_PROC_RESOURCE_LIMITS
8369 assert(task == current_task());
8370 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
8371 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8372 }
8373
8374 #if CONFIG_PROC_RESOURCE_LIMITS
8375 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task,int current_size,int soft_limit,int hard_limit)8376 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
8377 {
8378 int pid = 0;
8379 char *procname = (char *) "unknown";
8380 kern_return_t kr;
8381 resource_notify_flags_t flags = kRNFlagsNone;
8382 int limit;
8383 mach_port_t task_fatal_port = MACH_PORT_NULL;
8384
8385 #ifdef MACH_BSD
8386 pid = proc_selfpid();
8387 if (get_bsdtask_info(task) != NULL) {
8388 procname = proc_name_address(get_bsdtask_info(task));
8389 }
8390 #endif
8391 /*
8392 * Only kernel_task and launchd may be allowed to
8393 * have really large ipc space.
8394 */
8395 if (pid == 0 || pid == 1) {
8396 return;
8397 }
8398
8399 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8400 Num of fds allocated %u; \n", procname, pid, current_size);
8401
8402 if (hard_limit > 0) {
8403 flags |= kRNHardLimitFlag;
8404 limit = hard_limit;
8405 task_fatal_port = task_allocate_fatal_port();
8406 if (!task_fatal_port) {
8407 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8408 task_bsdtask_kill(task);
8409 }
8410 } else {
8411 flags |= kRNSoftLimitFlag;
8412 limit = soft_limit;
8413 }
8414
8415 kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8416 if (kr) {
8417 os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8418 }
8419 if (task_fatal_port) {
8420 ipc_port_release_send(task_fatal_port);
8421 }
8422 }
8423 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
8424
8425 /* Placeholders for the task set/get voucher interfaces */
8426 kern_return_t
task_get_mach_voucher(task_t task,mach_voucher_selector_t __unused which,ipc_voucher_t * voucher)8427 task_get_mach_voucher(
8428 task_t task,
8429 mach_voucher_selector_t __unused which,
8430 ipc_voucher_t *voucher)
8431 {
8432 if (TASK_NULL == task) {
8433 return KERN_INVALID_TASK;
8434 }
8435
8436 *voucher = NULL;
8437 return KERN_SUCCESS;
8438 }
8439
8440 kern_return_t
task_set_mach_voucher(task_t task,ipc_voucher_t __unused voucher)8441 task_set_mach_voucher(
8442 task_t task,
8443 ipc_voucher_t __unused voucher)
8444 {
8445 if (TASK_NULL == task) {
8446 return KERN_INVALID_TASK;
8447 }
8448
8449 return KERN_SUCCESS;
8450 }
8451
8452 kern_return_t
task_swap_mach_voucher(__unused task_t task,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)8453 task_swap_mach_voucher(
8454 __unused task_t task,
8455 __unused ipc_voucher_t new_voucher,
8456 ipc_voucher_t *in_out_old_voucher)
8457 {
8458 /*
8459 * Currently this function is only called from a MIG generated
8460 * routine which doesn't release the reference on the voucher
8461 * addressed by in_out_old_voucher. To avoid leaking this reference,
8462 * a call to release it has been added here.
8463 */
8464 ipc_voucher_release(*in_out_old_voucher);
8465 OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
8466 }
8467
8468 void
task_set_gpu_denied(task_t task,boolean_t denied)8469 task_set_gpu_denied(task_t task, boolean_t denied)
8470 {
8471 task_lock(task);
8472
8473 if (denied) {
8474 task->t_flags |= TF_GPU_DENIED;
8475 } else {
8476 task->t_flags &= ~TF_GPU_DENIED;
8477 }
8478
8479 task_unlock(task);
8480 }
8481
8482 boolean_t
task_is_gpu_denied(task_t task)8483 task_is_gpu_denied(task_t task)
8484 {
8485 /* We don't need the lock to read this flag */
8486 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8487 }
8488
8489 /*
8490 * Task policy termination uses this path to clear the bit the final time
8491 * during the termination flow, and the TASK_POLICY_TERMINATED bit guarantees
8492 * that it won't be changed again on a terminated task.
8493 */
8494 bool
task_set_game_mode_locked(task_t task,bool enabled)8495 task_set_game_mode_locked(task_t task, bool enabled)
8496 {
8497 task_lock_assert_owned(task);
8498
8499 if (enabled) {
8500 assert(proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0);
8501 }
8502
8503 bool previously_enabled = task_get_game_mode(task);
8504 bool needs_update = false;
8505 uint32_t new_count = 0;
8506
8507 if (enabled) {
8508 task->t_flags |= TF_GAME_MODE;
8509 } else {
8510 task->t_flags &= ~TF_GAME_MODE;
8511 }
8512
8513 if (enabled && !previously_enabled) {
8514 if (task_coalition_adjust_game_mode_count(task, 1, &new_count) && (new_count == 1)) {
8515 needs_update = true;
8516 }
8517 } else if (!enabled && previously_enabled) {
8518 if (task_coalition_adjust_game_mode_count(task, -1, &new_count) && (new_count == 0)) {
8519 needs_update = true;
8520 }
8521 }
8522
8523 return needs_update;
8524 }
8525
8526 void
task_set_game_mode(task_t task,bool enabled)8527 task_set_game_mode(task_t task, bool enabled)
8528 {
8529 bool needs_update = false;
8530
8531 task_lock(task);
8532
8533 /* After termination, further updates are no longer effective */
8534 if (proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0) {
8535 needs_update = task_set_game_mode_locked(task, enabled);
8536 }
8537
8538 task_unlock(task);
8539
8540 #if CONFIG_THREAD_GROUPS
8541 if (needs_update) {
8542 task_coalition_thread_group_game_mode_update(task);
8543 }
8544 #endif /* CONFIG_THREAD_GROUPS */
8545 }
8546
8547 bool
task_get_game_mode(task_t task)8548 task_get_game_mode(task_t task)
8549 {
8550 /* We don't need the lock to read this flag */
8551 return task->t_flags & TF_GAME_MODE;
8552 }
8553
8554
8555 uint64_t
get_task_memory_region_count(task_t task)8556 get_task_memory_region_count(task_t task)
8557 {
8558 vm_map_t map;
8559 map = (task == kernel_task) ? kernel_map: task->map;
8560 return (uint64_t)get_map_nentries(map);
8561 }
8562
8563 static void
kdebug_trace_dyld_internal(uint32_t base_code,struct dyld_kernel_image_info * info)8564 kdebug_trace_dyld_internal(uint32_t base_code,
8565 struct dyld_kernel_image_info *info)
8566 {
8567 static_assert(sizeof(info->uuid) >= 16);
8568
8569 #if defined(__LP64__)
8570 uint64_t *uuid = (uint64_t *)&(info->uuid);
8571
8572 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8573 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8574 uuid[1], info->load_addr,
8575 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8576 0);
8577 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8578 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8579 (uint64_t)info->fsobjid.fid_objno |
8580 ((uint64_t)info->fsobjid.fid_generation << 32),
8581 0, 0, 0, 0);
8582 #else /* defined(__LP64__) */
8583 uint32_t *uuid = (uint32_t *)&(info->uuid);
8584
8585 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8586 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8587 uuid[1], uuid[2], uuid[3], 0);
8588 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8589 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8590 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8591 info->fsobjid.fid_objno, 0);
8592 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8593 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8594 info->fsobjid.fid_generation, 0, 0, 0, 0);
8595 #endif /* !defined(__LP64__) */
8596 }
8597
8598 static kern_return_t
kdebug_trace_dyld(task_t task,uint32_t base_code,vm_map_copy_t infos_copy,mach_msg_type_number_t infos_len)8599 kdebug_trace_dyld(task_t task, uint32_t base_code,
8600 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8601 {
8602 kern_return_t kr;
8603 dyld_kernel_image_info_array_t infos;
8604 vm_map_offset_t map_data;
8605 vm_offset_t data;
8606
8607 if (!infos_copy) {
8608 return KERN_INVALID_ADDRESS;
8609 }
8610
8611 if (!kdebug_enable ||
8612 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8613 vm_map_copy_discard(infos_copy);
8614 return KERN_SUCCESS;
8615 }
8616
8617 if (task == NULL || task != current_task()) {
8618 return KERN_INVALID_TASK;
8619 }
8620
8621 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
8622 if (kr != KERN_SUCCESS) {
8623 return kr;
8624 }
8625
8626 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8627
8628 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8629 kdebug_trace_dyld_internal(base_code, &(infos[i]));
8630 }
8631
8632 data = CAST_DOWN(vm_offset_t, map_data);
8633 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
8634 return KERN_SUCCESS;
8635 }
8636
8637 kern_return_t
task_register_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8638 task_register_dyld_image_infos(task_t task,
8639 dyld_kernel_image_info_array_t infos_copy,
8640 mach_msg_type_number_t infos_len)
8641 {
8642 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8643 (vm_map_copy_t)infos_copy, infos_len);
8644 }
8645
8646 kern_return_t
task_unregister_dyld_image_infos(task_t task,dyld_kernel_image_info_array_t infos_copy,mach_msg_type_number_t infos_len)8647 task_unregister_dyld_image_infos(task_t task,
8648 dyld_kernel_image_info_array_t infos_copy,
8649 mach_msg_type_number_t infos_len)
8650 {
8651 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8652 (vm_map_copy_t)infos_copy, infos_len);
8653 }
8654
8655 kern_return_t
task_get_dyld_image_infos(__unused task_t task,__unused dyld_kernel_image_info_array_t * dyld_images,__unused mach_msg_type_number_t * dyld_imagesCnt)8656 task_get_dyld_image_infos(__unused task_t task,
8657 __unused dyld_kernel_image_info_array_t * dyld_images,
8658 __unused mach_msg_type_number_t * dyld_imagesCnt)
8659 {
8660 return KERN_NOT_SUPPORTED;
8661 }
8662
8663 kern_return_t
task_register_dyld_shared_cache_image_info(task_t task,dyld_kernel_image_info_t cache_img,__unused boolean_t no_cache,__unused boolean_t private_cache)8664 task_register_dyld_shared_cache_image_info(task_t task,
8665 dyld_kernel_image_info_t cache_img,
8666 __unused boolean_t no_cache,
8667 __unused boolean_t private_cache)
8668 {
8669 if (task == NULL || task != current_task()) {
8670 return KERN_INVALID_TASK;
8671 }
8672
8673 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
8674 return KERN_SUCCESS;
8675 }
8676
8677 kern_return_t
task_register_dyld_set_dyld_state(__unused task_t task,__unused uint8_t dyld_state)8678 task_register_dyld_set_dyld_state(__unused task_t task,
8679 __unused uint8_t dyld_state)
8680 {
8681 return KERN_NOT_SUPPORTED;
8682 }
8683
8684 kern_return_t
task_register_dyld_get_process_state(__unused task_t task,__unused dyld_kernel_process_info_t * dyld_process_state)8685 task_register_dyld_get_process_state(__unused task_t task,
8686 __unused dyld_kernel_process_info_t * dyld_process_state)
8687 {
8688 return KERN_NOT_SUPPORTED;
8689 }
8690
8691 kern_return_t
task_inspect(task_inspect_t task_insp,task_inspect_flavor_t flavor,task_inspect_info_t info_out,mach_msg_type_number_t * size_in_out)8692 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8693 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8694 {
8695 #if CONFIG_PERVASIVE_CPI
8696 task_t task = (task_t)task_insp;
8697 kern_return_t kr = KERN_SUCCESS;
8698 mach_msg_type_number_t size;
8699
8700 if (task == TASK_NULL) {
8701 return KERN_INVALID_ARGUMENT;
8702 }
8703
8704 size = *size_in_out;
8705
8706 switch (flavor) {
8707 case TASK_INSPECT_BASIC_COUNTS: {
8708 struct task_inspect_basic_counts *bc =
8709 (struct task_inspect_basic_counts *)info_out;
8710 struct recount_usage stats = { 0 };
8711 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8712 kr = KERN_INVALID_ARGUMENT;
8713 break;
8714 }
8715
8716 recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8717 bc->instructions = stats.ru_instructions;
8718 bc->cycles = stats.ru_cycles;
8719 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8720 break;
8721 }
8722 default:
8723 kr = KERN_INVALID_ARGUMENT;
8724 break;
8725 }
8726
8727 if (kr == KERN_SUCCESS) {
8728 *size_in_out = size;
8729 }
8730 return kr;
8731 #else /* CONFIG_PERVASIVE_CPI */
8732 #pragma unused(task_insp, flavor, info_out, size_in_out)
8733 return KERN_NOT_SUPPORTED;
8734 #endif /* !CONFIG_PERVASIVE_CPI */
8735 }
8736
8737 #if CONFIG_SECLUDED_MEMORY
8738 int num_tasks_can_use_secluded_mem = 0;
8739
8740 void
task_set_can_use_secluded_mem(task_t task,boolean_t can_use_secluded_mem)8741 task_set_can_use_secluded_mem(
8742 task_t task,
8743 boolean_t can_use_secluded_mem)
8744 {
8745 if (!task->task_could_use_secluded_mem) {
8746 return;
8747 }
8748 task_lock(task);
8749 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8750 task_unlock(task);
8751 }
8752
8753 void
task_set_can_use_secluded_mem_locked(task_t task,boolean_t can_use_secluded_mem)8754 task_set_can_use_secluded_mem_locked(
8755 task_t task,
8756 boolean_t can_use_secluded_mem)
8757 {
8758 assert(task->task_could_use_secluded_mem);
8759 if (can_use_secluded_mem &&
8760 secluded_for_apps && /* global boot-arg */
8761 !task->task_can_use_secluded_mem) {
8762 assert(num_tasks_can_use_secluded_mem >= 0);
8763 OSAddAtomic(+1,
8764 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8765 task->task_can_use_secluded_mem = TRUE;
8766 } else if (!can_use_secluded_mem &&
8767 task->task_can_use_secluded_mem) {
8768 assert(num_tasks_can_use_secluded_mem > 0);
8769 OSAddAtomic(-1,
8770 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8771 task->task_can_use_secluded_mem = FALSE;
8772 }
8773 }
8774
8775 void
task_set_could_use_secluded_mem(task_t task,boolean_t could_use_secluded_mem)8776 task_set_could_use_secluded_mem(
8777 task_t task,
8778 boolean_t could_use_secluded_mem)
8779 {
8780 task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8781 }
8782
8783 void
task_set_could_also_use_secluded_mem(task_t task,boolean_t could_also_use_secluded_mem)8784 task_set_could_also_use_secluded_mem(
8785 task_t task,
8786 boolean_t could_also_use_secluded_mem)
8787 {
8788 task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8789 }
8790
8791 boolean_t
task_can_use_secluded_mem(task_t task,boolean_t is_alloc)8792 task_can_use_secluded_mem(
8793 task_t task,
8794 boolean_t is_alloc)
8795 {
8796 if (task->task_can_use_secluded_mem) {
8797 assert(task->task_could_use_secluded_mem);
8798 assert(num_tasks_can_use_secluded_mem > 0);
8799 return TRUE;
8800 }
8801 if (task->task_could_also_use_secluded_mem &&
8802 num_tasks_can_use_secluded_mem > 0) {
8803 assert(num_tasks_can_use_secluded_mem > 0);
8804 return TRUE;
8805 }
8806
8807 /*
8808 * If a single task is using more than some large amount of
8809 * memory (i.e. secluded_shutoff_trigger) and is approaching
8810 * its task limit, allow it to dip into secluded and begin
8811 * suppression of rebuilding secluded memory until that task exits.
8812 */
8813 if (is_alloc && secluded_shutoff_trigger != 0) {
8814 uint64_t phys_used = get_task_phys_footprint(task);
8815 uint64_t limit = get_task_phys_footprint_limit(task);
8816 if (phys_used > secluded_shutoff_trigger &&
8817 limit > secluded_shutoff_trigger &&
8818 phys_used > limit - secluded_shutoff_headroom) {
8819 start_secluded_suppression(task);
8820 return TRUE;
8821 }
8822 }
8823
8824 return FALSE;
8825 }
8826
8827 boolean_t
task_could_use_secluded_mem(task_t task)8828 task_could_use_secluded_mem(
8829 task_t task)
8830 {
8831 return task->task_could_use_secluded_mem;
8832 }
8833
8834 boolean_t
task_could_also_use_secluded_mem(task_t task)8835 task_could_also_use_secluded_mem(
8836 task_t task)
8837 {
8838 return task->task_could_also_use_secluded_mem;
8839 }
8840 #endif /* CONFIG_SECLUDED_MEMORY */
8841
8842 queue_head_t *
task_io_user_clients(task_t task)8843 task_io_user_clients(task_t task)
8844 {
8845 return &task->io_user_clients;
8846 }
8847
8848 void
task_set_message_app_suspended(task_t task,boolean_t enable)8849 task_set_message_app_suspended(task_t task, boolean_t enable)
8850 {
8851 task->message_app_suspended = enable;
8852 }
8853
8854 void
task_copy_fields_for_exec(task_t dst_task,task_t src_task)8855 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
8856 {
8857 dst_task->vtimers = src_task->vtimers;
8858 }
8859
8860 #if DEVELOPMENT || DEBUG
8861 int vm_region_footprint = 0;
8862 #endif /* DEVELOPMENT || DEBUG */
8863
8864 boolean_t
task_self_region_footprint(void)8865 task_self_region_footprint(void)
8866 {
8867 #if DEVELOPMENT || DEBUG
8868 if (vm_region_footprint) {
8869 /* system-wide override */
8870 return TRUE;
8871 }
8872 #endif /* DEVELOPMENT || DEBUG */
8873 return current_task()->task_region_footprint;
8874 }
8875
8876 void
task_self_region_footprint_set(boolean_t newval)8877 task_self_region_footprint_set(
8878 boolean_t newval)
8879 {
8880 task_t curtask;
8881
8882 curtask = current_task();
8883 task_lock(curtask);
8884 if (newval) {
8885 curtask->task_region_footprint = TRUE;
8886 } else {
8887 curtask->task_region_footprint = FALSE;
8888 }
8889 task_unlock(curtask);
8890 }
8891
8892 void
task_set_darkwake_mode(task_t task,boolean_t set_mode)8893 task_set_darkwake_mode(task_t task, boolean_t set_mode)
8894 {
8895 assert(task);
8896
8897 task_lock(task);
8898
8899 if (set_mode) {
8900 task->t_flags |= TF_DARKWAKE_MODE;
8901 } else {
8902 task->t_flags &= ~(TF_DARKWAKE_MODE);
8903 }
8904
8905 task_unlock(task);
8906 }
8907
8908 boolean_t
task_get_darkwake_mode(task_t task)8909 task_get_darkwake_mode(task_t task)
8910 {
8911 assert(task);
8912 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
8913 }
8914
8915 /*
8916 * Set default behavior for task's control port and EXC_GUARD variants that have
8917 * settable behavior.
8918 *
8919 * Platform binaries typically have one behavior, third parties another -
8920 * but there are special exception we may need to account for.
8921 */
8922 void
task_set_exc_guard_ctrl_port_default(task_t task,thread_t main_thread,const char * name,unsigned int namelen,boolean_t is_simulated,uint32_t platform,uint32_t sdk)8923 task_set_exc_guard_ctrl_port_default(
8924 task_t task,
8925 thread_t main_thread,
8926 const char *name,
8927 unsigned int namelen,
8928 boolean_t is_simulated,
8929 uint32_t platform,
8930 uint32_t sdk)
8931 {
8932 task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8933
8934 if (task_get_platform_binary(task)) {
8935 /* set exc guard default behavior for first-party code */
8936 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
8937
8938 if (1 == task_pid(task)) {
8939 /* special flags for inittask - delivery every instance as corpse */
8940 task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
8941 } else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
8942 /* honor by-name default setting overrides */
8943
8944 int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
8945
8946 for (int i = 0; i < count; i++) {
8947 const struct task_exc_guard_named_default *named_default =
8948 &task_exc_guard_named_defaults[i];
8949 if (strncmp(named_default->name, name, namelen) == 0 &&
8950 strlen(named_default->name) == namelen) {
8951 task->task_exc_guard = named_default->behavior;
8952 break;
8953 }
8954 }
8955 }
8956
8957 /* set control port options for 1p code, inherited from parent task by default */
8958 opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
8959 } else {
8960 /* set exc guard default behavior for third-party code */
8961 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
8962 /* set control port options for 3p code, inherited from parent task by default */
8963 opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
8964 }
8965
8966 if (is_simulated) {
8967 /* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
8968 if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
8969 (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
8970 (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
8971 task->task_exc_guard = TASK_EXC_GUARD_NONE;
8972 }
8973 /* Disable protection for control ports for simulated binaries */
8974 opts = TASK_CONTROL_PORT_OPTIONS_NONE;
8975 }
8976
8977
8978 task_set_control_port_options(task, opts);
8979
8980 task_set_immovable_pinned(task);
8981 main_thread_set_immovable_pinned(main_thread);
8982 }
8983
8984 kern_return_t
task_get_exc_guard_behavior(task_t task,task_exc_guard_behavior_t * behaviorp)8985 task_get_exc_guard_behavior(
8986 task_t task,
8987 task_exc_guard_behavior_t *behaviorp)
8988 {
8989 if (task == TASK_NULL) {
8990 return KERN_INVALID_TASK;
8991 }
8992 *behaviorp = task->task_exc_guard;
8993 return KERN_SUCCESS;
8994 }
8995
8996 kern_return_t
task_set_exc_guard_behavior(task_t task,task_exc_guard_behavior_t new_behavior)8997 task_set_exc_guard_behavior(
8998 task_t task,
8999 task_exc_guard_behavior_t new_behavior)
9000 {
9001 if (task == TASK_NULL) {
9002 return KERN_INVALID_TASK;
9003 }
9004 if (new_behavior & ~TASK_EXC_GUARD_ALL) {
9005 return KERN_INVALID_VALUE;
9006 }
9007
9008 /* limit setting to that allowed for this config */
9009 new_behavior = new_behavior & task_exc_guard_config_mask;
9010
9011 #if !defined (DEBUG) && !defined (DEVELOPMENT)
9012 /* On release kernels, only allow _upgrading_ exc guard behavior */
9013 task_exc_guard_behavior_t cur_behavior;
9014
9015 os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
9016 if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
9017 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9018 }
9019
9020 if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
9021 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9022 }
9023
9024 /* no restrictions on CORPSE bit */
9025 });
9026 #else
9027 task->task_exc_guard = new_behavior;
9028 #endif
9029 return KERN_SUCCESS;
9030 }
9031
9032 kern_return_t
task_set_corpse_forking_behavior(task_t task,task_corpse_forking_behavior_t behavior)9033 task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
9034 {
9035 #if DEVELOPMENT || DEBUG
9036 if (task == TASK_NULL) {
9037 return KERN_INVALID_TASK;
9038 }
9039
9040 task_lock(task);
9041 if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
9042 task->t_flags |= TF_NO_CORPSE_FORKING;
9043 } else {
9044 task->t_flags &= ~TF_NO_CORPSE_FORKING;
9045 }
9046 task_unlock(task);
9047
9048 return KERN_SUCCESS;
9049 #else
9050 (void)task;
9051 (void)behavior;
9052 return KERN_NOT_SUPPORTED;
9053 #endif
9054 }
9055
9056 boolean_t
task_corpse_forking_disabled(task_t task)9057 task_corpse_forking_disabled(task_t task)
9058 {
9059 boolean_t disabled = FALSE;
9060
9061 task_lock(task);
9062 disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
9063 task_unlock(task);
9064
9065 return disabled;
9066 }
9067
9068 #if __arm64__
9069 extern int legacy_footprint_entitlement_mode;
9070 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
9071 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
9072
9073
9074 void
task_set_legacy_footprint(task_t task)9075 task_set_legacy_footprint(
9076 task_t task)
9077 {
9078 task_lock(task);
9079 task->task_legacy_footprint = TRUE;
9080 task_unlock(task);
9081 }
9082
9083 void
task_set_extra_footprint_limit(task_t task)9084 task_set_extra_footprint_limit(
9085 task_t task)
9086 {
9087 if (task->task_extra_footprint_limit) {
9088 return;
9089 }
9090 task_lock(task);
9091 if (task->task_extra_footprint_limit) {
9092 task_unlock(task);
9093 return;
9094 }
9095 task->task_extra_footprint_limit = TRUE;
9096 task_unlock(task);
9097 memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
9098 }
9099
9100 void
task_set_ios13extended_footprint_limit(task_t task)9101 task_set_ios13extended_footprint_limit(
9102 task_t task)
9103 {
9104 if (task->task_ios13extended_footprint_limit) {
9105 return;
9106 }
9107 task_lock(task);
9108 if (task->task_ios13extended_footprint_limit) {
9109 task_unlock(task);
9110 return;
9111 }
9112 task->task_ios13extended_footprint_limit = TRUE;
9113 task_unlock(task);
9114 memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
9115 }
9116 #endif /* __arm64__ */
9117
9118 static inline ledger_amount_t
task_ledger_get_balance(ledger_t ledger,int ledger_idx)9119 task_ledger_get_balance(
9120 ledger_t ledger,
9121 int ledger_idx)
9122 {
9123 ledger_amount_t amount;
9124 amount = 0;
9125 ledger_get_balance(ledger, ledger_idx, &amount);
9126 return amount;
9127 }
9128
9129 /*
9130 * Gather the amount of memory counted in a task's footprint due to
9131 * being in a specific set of ledgers.
9132 */
9133 void
task_ledgers_footprint(ledger_t ledger,ledger_amount_t * ledger_resident,ledger_amount_t * ledger_compressed)9134 task_ledgers_footprint(
9135 ledger_t ledger,
9136 ledger_amount_t *ledger_resident,
9137 ledger_amount_t *ledger_compressed)
9138 {
9139 *ledger_resident = 0;
9140 *ledger_compressed = 0;
9141
9142 /* purgeable non-volatile memory */
9143 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
9144 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
9145
9146 /* "default" tagged memory */
9147 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
9148 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
9149
9150 /* "network" currently never counts in the footprint... */
9151
9152 /* "media" tagged memory */
9153 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
9154 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
9155
9156 /* "graphics" tagged memory */
9157 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
9158 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
9159
9160 /* "neural" tagged memory */
9161 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
9162 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
9163 }
9164
9165 #if CONFIG_MEMORYSTATUS
9166 /*
9167 * Credit any outstanding task dirty time to the ledger.
9168 * memstat_dirty_start is pushed forward to prevent any possibility of double
9169 * counting, making it safe to call this as often as necessary to ensure that
9170 * anyone reading the ledger gets up-to-date information.
9171 */
9172 void
task_ledger_settle_dirty_time(task_t t)9173 task_ledger_settle_dirty_time(task_t t)
9174 {
9175 task_lock(t);
9176
9177 uint64_t start = t->memstat_dirty_start;
9178 if (start) {
9179 uint64_t now = mach_absolute_time();
9180
9181 uint64_t duration;
9182 absolutetime_to_nanoseconds(now - start, &duration);
9183
9184 ledger_t ledger = get_task_ledger(t);
9185 ledger_credit(ledger, task_ledgers.memorystatus_dirty_time, duration);
9186
9187 t->memstat_dirty_start = now;
9188 }
9189
9190 task_unlock(t);
9191 }
9192 #endif /* CONFIG_MEMORYSTATUS */
9193
9194 void
task_set_memory_ownership_transfer(task_t task,boolean_t value)9195 task_set_memory_ownership_transfer(
9196 task_t task,
9197 boolean_t value)
9198 {
9199 task_lock(task);
9200 task->task_can_transfer_memory_ownership = !!value;
9201 task_unlock(task);
9202 }
9203
9204 #if DEVELOPMENT || DEBUG
9205
9206 void
task_set_no_footprint_for_debug(task_t task,boolean_t value)9207 task_set_no_footprint_for_debug(task_t task, boolean_t value)
9208 {
9209 task_lock(task);
9210 task->task_no_footprint_for_debug = !!value;
9211 task_unlock(task);
9212 }
9213
9214 int
task_get_no_footprint_for_debug(task_t task)9215 task_get_no_footprint_for_debug(task_t task)
9216 {
9217 return task->task_no_footprint_for_debug;
9218 }
9219
9220 #endif /* DEVELOPMENT || DEBUG */
9221
9222 void
task_copy_vmobjects(task_t task,vm_object_query_t query,size_t len,size_t * num)9223 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
9224 {
9225 vm_object_t find_vmo;
9226 size_t size = 0;
9227
9228 task_objq_lock(task);
9229 if (query != NULL) {
9230 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
9231 {
9232 vm_object_query_t p = &query[size++];
9233
9234 /* make sure to not overrun */
9235 if (size * sizeof(vm_object_query_data_t) > len) {
9236 --size;
9237 break;
9238 }
9239
9240 bzero(p, sizeof(*p));
9241 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
9242 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
9243 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
9244 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
9245 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
9246 p->vo_no_footprint = find_vmo->vo_no_footprint;
9247 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
9248 p->purgable = find_vmo->purgable;
9249
9250 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
9251 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
9252 } else {
9253 p->compressed_size = 0;
9254 }
9255 }
9256 } else {
9257 size = (size_t)task->task_owned_objects;
9258 }
9259 task_objq_unlock(task);
9260
9261 *num = size;
9262 }
9263
9264 void
task_get_owned_vmobjects(task_t task,size_t buffer_size,vmobject_list_output_t buffer,size_t * output_size,size_t * entries)9265 task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
9266 {
9267 assert(output_size);
9268 assert(entries);
9269
9270 /* copy the vmobjects and vmobject data out of the task */
9271 if (buffer_size == 0) {
9272 task_copy_vmobjects(task, NULL, 0, entries);
9273 *output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
9274 } else {
9275 assert(buffer);
9276 task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), entries);
9277 buffer->entries = (uint64_t)*entries;
9278 *output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
9279 }
9280 }
9281
9282 void
task_store_owned_vmobject_info(task_t to_task,task_t from_task)9283 task_store_owned_vmobject_info(task_t to_task, task_t from_task)
9284 {
9285 size_t buffer_size;
9286 vmobject_list_output_t buffer;
9287 size_t output_size;
9288 size_t entries;
9289
9290 assert(to_task != from_task);
9291
9292 /* get the size, allocate a bufferr, and populate */
9293 entries = 0;
9294 output_size = 0;
9295 task_get_owned_vmobjects(from_task, 0, NULL, &output_size, &entries);
9296
9297 if (output_size) {
9298 buffer_size = output_size;
9299 buffer = kalloc_data(buffer_size, Z_WAITOK);
9300
9301 if (buffer) {
9302 entries = 0;
9303 output_size = 0;
9304
9305 task_get_owned_vmobjects(from_task, buffer_size, buffer, &output_size, &entries);
9306
9307 if (entries) {
9308 to_task->corpse_vmobject_list = buffer;
9309 to_task->corpse_vmobject_list_size = buffer_size;
9310 }
9311 }
9312 }
9313 }
9314
9315 void
task_set_filter_msg_flag(task_t task,boolean_t flag)9316 task_set_filter_msg_flag(
9317 task_t task,
9318 boolean_t flag)
9319 {
9320 assert(task != TASK_NULL);
9321
9322 if (flag) {
9323 task_ro_flags_set(task, TFRO_FILTER_MSG);
9324 } else {
9325 task_ro_flags_clear(task, TFRO_FILTER_MSG);
9326 }
9327 }
9328
9329 boolean_t
task_get_filter_msg_flag(task_t task)9330 task_get_filter_msg_flag(
9331 task_t task)
9332 {
9333 if (!task) {
9334 return false;
9335 }
9336
9337 return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
9338 }
9339 bool
task_is_exotic(task_t task)9340 task_is_exotic(
9341 task_t task)
9342 {
9343 if (task == TASK_NULL) {
9344 return false;
9345 }
9346 return vm_map_is_exotic(get_task_map(task));
9347 }
9348
9349 bool
task_is_alien(task_t task)9350 task_is_alien(
9351 task_t task)
9352 {
9353 if (task == TASK_NULL) {
9354 return false;
9355 }
9356 return vm_map_is_alien(get_task_map(task));
9357 }
9358
9359
9360
9361 #if CONFIG_MACF
9362 /* Set the filter mask for Mach traps. */
9363 void
mac_task_set_mach_filter_mask(task_t task,uint8_t * maskptr)9364 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
9365 {
9366 assert(task);
9367
9368 task_set_mach_trap_filter_mask(task, maskptr);
9369 }
9370
9371 /* Set the filter mask for kobject msgs. */
9372 void
mac_task_set_kobj_filter_mask(task_t task,uint8_t * maskptr)9373 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
9374 {
9375 assert(task);
9376
9377 task_set_mach_kobj_filter_mask(task, maskptr);
9378 }
9379
9380 /* Hook for mach trap/sc filter evaluation policy. */
9381 SECURITY_READ_ONLY_LATE(mac_task_mach_filter_cbfunc_t) mac_task_mach_trap_evaluate = NULL;
9382
9383 /* Hook for kobj message filter evaluation policy. */
9384 SECURITY_READ_ONLY_LATE(mac_task_kobj_filter_cbfunc_t) mac_task_kobj_msg_evaluate = NULL;
9385
9386 /* Set the callback hooks for the filtering policy. */
9387 int
mac_task_register_filter_callbacks(const mac_task_mach_filter_cbfunc_t mach_cbfunc,const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)9388 mac_task_register_filter_callbacks(
9389 const mac_task_mach_filter_cbfunc_t mach_cbfunc,
9390 const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
9391 {
9392 if (mach_cbfunc != NULL) {
9393 if (mac_task_mach_trap_evaluate != NULL) {
9394 return KERN_FAILURE;
9395 }
9396 mac_task_mach_trap_evaluate = mach_cbfunc;
9397 }
9398 if (kobj_cbfunc != NULL) {
9399 if (mac_task_kobj_msg_evaluate != NULL) {
9400 return KERN_FAILURE;
9401 }
9402 mac_task_kobj_msg_evaluate = kobj_cbfunc;
9403 }
9404
9405 return KERN_SUCCESS;
9406 }
9407 #endif /* CONFIG_MACF */
9408
9409 #if CONFIG_ROSETTA
9410 bool
task_is_translated(task_t task)9411 task_is_translated(task_t task)
9412 {
9413 extern boolean_t proc_is_translated(struct proc* p);
9414 return task && proc_is_translated(get_bsdtask_info(task));
9415 }
9416 #endif
9417
9418
9419 #if __has_feature(ptrauth_calls)
9420 /* All pac violations will be delivered as fatal exceptions irrespective of
9421 * the enable_pac_exception boot-arg value.
9422 */
9423 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
9424 /*
9425 * When enable_pac_exception boot-arg is set to true, processes
9426 * can choose to get non-fatal pac exception delivery by setting
9427 * this entitlement.
9428 */
9429 #define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
9430
9431 void
task_set_pac_exception_fatal_flag(task_t task)9432 task_set_pac_exception_fatal_flag(
9433 task_t task)
9434 {
9435 assert(task != TASK_NULL);
9436 bool pac_entitlement = false;
9437 uint32_t set_flags = 0;
9438
9439 if (enable_pac_exception && IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
9440 return;
9441 }
9442
9443 if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
9444 pac_entitlement = true;
9445 }
9446
9447 if (pac_entitlement) {
9448 set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
9449 }
9450 if (pac_entitlement || (enable_pac_exception && task_get_platform_binary(task))) {
9451 set_flags |= TFRO_PAC_EXC_FATAL;
9452 }
9453 if (set_flags != 0) {
9454 task_ro_flags_set(task, set_flags);
9455 }
9456 }
9457
9458 bool
task_is_pac_exception_fatal(task_t task)9459 task_is_pac_exception_fatal(
9460 task_t task)
9461 {
9462 assert(task != TASK_NULL);
9463 return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
9464 }
9465 #endif /* __has_feature(ptrauth_calls) */
9466
9467 bool
task_needs_user_signed_thread_state(task_t task)9468 task_needs_user_signed_thread_state(
9469 task_t task)
9470 {
9471 assert(task != TASK_NULL);
9472 return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
9473 }
9474
9475 void
task_set_tecs(task_t task)9476 task_set_tecs(task_t task)
9477 {
9478 if (task == TASK_NULL) {
9479 task = current_task();
9480 }
9481
9482 if (!machine_csv(CPUVN_CI)) {
9483 return;
9484 }
9485
9486 LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9487
9488 task_lock(task);
9489
9490 task->t_flags |= TF_TECS;
9491
9492 thread_t thread;
9493 queue_iterate(&task->threads, thread, thread_t, task_threads) {
9494 machine_tecs(thread);
9495 }
9496 task_unlock(task);
9497 }
9498
9499 kern_return_t
task_test_sync_upcall(task_t task,ipc_port_t send_port)9500 task_test_sync_upcall(
9501 task_t task,
9502 ipc_port_t send_port)
9503 {
9504 #if DEVELOPMENT || DEBUG
9505 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9506 return KERN_INVALID_ARGUMENT;
9507 }
9508
9509 /* Block on sync kernel upcall on the given send port */
9510 mach_test_sync_upcall(send_port);
9511
9512 ipc_port_release_send(send_port);
9513 return KERN_SUCCESS;
9514 #else
9515 (void)task;
9516 (void)send_port;
9517 return KERN_NOT_SUPPORTED;
9518 #endif
9519 }
9520
9521 kern_return_t
task_test_async_upcall_propagation(task_t task,ipc_port_t send_port,int qos,int iotier)9522 task_test_async_upcall_propagation(
9523 task_t task,
9524 ipc_port_t send_port,
9525 int qos,
9526 int iotier)
9527 {
9528 #if DEVELOPMENT || DEBUG
9529 kern_return_t kr;
9530
9531 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9532 return KERN_INVALID_ARGUMENT;
9533 }
9534
9535 if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
9536 iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
9537 return KERN_INVALID_ARGUMENT;
9538 }
9539
9540 struct thread_attr_for_ipc_propagation attr = {
9541 .tafip_iotier = iotier,
9542 .tafip_qos = qos
9543 };
9544
9545 /* Apply propagate attr to port */
9546 kr = ipc_port_propagate_thread_attr(send_port, attr);
9547 if (kr != KERN_SUCCESS) {
9548 return kr;
9549 }
9550
9551 thread_enable_send_importance(current_thread(), TRUE);
9552
9553 /* Perform an async kernel upcall on the given send port */
9554 mach_test_async_upcall(send_port);
9555 thread_enable_send_importance(current_thread(), FALSE);
9556
9557 ipc_port_release_send(send_port);
9558 return KERN_SUCCESS;
9559 #else
9560 (void)task;
9561 (void)send_port;
9562 (void)qos;
9563 (void)iotier;
9564 return KERN_NOT_SUPPORTED;
9565 #endif
9566 }
9567
9568 #if CONFIG_PROC_RESOURCE_LIMITS
9569 mach_port_name_t
current_task_get_fatal_port_name(void)9570 current_task_get_fatal_port_name(void)
9571 {
9572 mach_port_t task_fatal_port = MACH_PORT_NULL;
9573 mach_port_name_t port_name = 0;
9574
9575 task_fatal_port = task_allocate_fatal_port();
9576
9577 if (task_fatal_port) {
9578 ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9579 IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9580 }
9581
9582 return port_name;
9583 }
9584 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
9585
9586 #if defined(__x86_64__)
9587 bool
curtask_get_insn_copy_optout(void)9588 curtask_get_insn_copy_optout(void)
9589 {
9590 bool optout;
9591 task_t cur_task = current_task();
9592
9593 task_lock(cur_task);
9594 optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9595 task_unlock(cur_task);
9596
9597 return optout;
9598 }
9599
9600 void
curtask_set_insn_copy_optout(void)9601 curtask_set_insn_copy_optout(void)
9602 {
9603 task_t cur_task = current_task();
9604
9605 task_lock(cur_task);
9606
9607 cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9608
9609 thread_t thread;
9610 queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9611 machine_thread_set_insn_copy_optout(thread);
9612 }
9613 task_unlock(cur_task);
9614 }
9615 #endif /* defined(__x86_64__) */
9616
9617 void
task_get_corpse_vmobject_list(task_t task,vmobject_list_output_t * list,size_t * list_size)9618 task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9619 {
9620 assert(task);
9621 assert(list_size);
9622
9623 *list = task->corpse_vmobject_list;
9624 *list_size = (size_t)task->corpse_vmobject_list_size;
9625 }
9626
9627 __abortlike
9628 static void
panic_proc_ro_task_backref_mismatch(task_t t,proc_ro_t ro)9629 panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9630 {
9631 panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9632 "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9633 }
9634
9635 proc_ro_t
task_get_ro(task_t t)9636 task_get_ro(task_t t)
9637 {
9638 proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9639
9640 zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
9641 if (__improbable(proc_ro_task(ro) != t)) {
9642 panic_proc_ro_task_backref_mismatch(t, ro);
9643 }
9644
9645 return ro;
9646 }
9647
9648 uint32_t
task_ro_flags_get(task_t task)9649 task_ro_flags_get(task_t task)
9650 {
9651 return task_get_ro(task)->t_flags_ro;
9652 }
9653
9654 void
task_ro_flags_set(task_t task,uint32_t flags)9655 task_ro_flags_set(task_t task, uint32_t flags)
9656 {
9657 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9658 t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9659 }
9660
9661 void
task_ro_flags_clear(task_t task,uint32_t flags)9662 task_ro_flags_clear(task_t task, uint32_t flags)
9663 {
9664 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9665 t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9666 }
9667
9668 task_control_port_options_t
task_get_control_port_options(task_t task)9669 task_get_control_port_options(task_t task)
9670 {
9671 return task_get_ro(task)->task_control_port_options;
9672 }
9673
9674 void
task_set_control_port_options(task_t task,task_control_port_options_t opts)9675 task_set_control_port_options(task_t task, task_control_port_options_t opts)
9676 {
9677 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9678 task_control_port_options, &opts);
9679 }
9680
9681 /*!
9682 * @function kdp_task_is_locked
9683 *
9684 * @abstract
9685 * Checks if task is locked.
9686 *
9687 * @discussion
9688 * NOT SAFE: To be used only by kernel debugger.
9689 *
9690 * @param task task to check
9691 *
9692 * @returns TRUE if the task is locked.
9693 */
9694 boolean_t
kdp_task_is_locked(task_t task)9695 kdp_task_is_locked(task_t task)
9696 {
9697 return kdp_lck_mtx_lock_spin_is_acquired(&task->lock);
9698 }
9699
9700 #if DEBUG || DEVELOPMENT
9701 /**
9702 *
9703 * Check if a threshold limit is valid based on the actual phys memory
9704 * limit. If they are same, race conditions may arise, so we have to prevent
9705 * it to happen.
9706 */
9707 static diagthreshold_check_return
task_check_memorythreshold_is_valid(task_t task,uint64_t new_limit,bool is_diagnostics_value)9708 task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value)
9709 {
9710 int phys_limit_mb;
9711 kern_return_t ret_value;
9712 bool threshold_enabled;
9713 bool dummy;
9714 ret_value = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, &threshold_enabled);
9715 if (ret_value != KERN_SUCCESS) {
9716 return ret_value;
9717 }
9718 if (is_diagnostics_value == true) {
9719 ret_value = task_get_phys_footprint_limit(task, &phys_limit_mb);
9720 } else {
9721 uint64_t diag_limit;
9722 ret_value = task_get_diag_footprint_limit_internal(task, &diag_limit, &dummy);
9723 phys_limit_mb = (int)(diag_limit >> 20);
9724 }
9725 if (ret_value != KERN_SUCCESS) {
9726 return ret_value;
9727 }
9728 if (phys_limit_mb == (int) new_limit) {
9729 if (threshold_enabled == false) {
9730 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED;
9731 } else {
9732 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED;
9733 }
9734 }
9735 if (threshold_enabled == false) {
9736 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED;
9737 } else {
9738 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED;
9739 }
9740 }
9741 #endif
9742
9743
9744 #pragma mark task utils
9745
9746 /* defined in bsd/kern/kern_proc.c */
9747 extern void proc_name(int pid, char *buf, int size);
9748 extern char *proc_best_name(struct proc *p);
9749
9750 void
task_procname(task_t task,char * buf,int size)9751 task_procname(task_t task, char *buf, int size)
9752 {
9753 proc_name(task_pid(task), buf, size);
9754 }
9755
9756 void
task_best_name(task_t task,char * buf,size_t size)9757 task_best_name(task_t task, char *buf, size_t size)
9758 {
9759 char *name = proc_best_name(task_get_proc_raw(task));
9760 strlcpy(buf, name, size);
9761 }
9762