xref: /xnu-8792.81.2/bsd/vm/vm_unix.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57 
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #if NECP
86 #include <net/necp.h>
87 #endif /* NECP */
88 #if SKYWALK
89 #include <skywalk/os_channel.h>
90 #endif /* SKYWALK */
91 
92 #include <security/audit/audit.h>
93 #include <security/mac.h>
94 #include <bsm/audit_kevents.h>
95 
96 #include <kern/kalloc.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100 
101 #include <mach/shared_region.h>
102 #include <vm/vm_shared_region.h>
103 
104 #include <vm/vm_dyld_pager.h>
105 
106 #include <vm/vm_protos.h>
107 
108 #include <sys/kern_memorystatus.h>
109 #include <sys/kern_memorystatus_freeze.h>
110 #include <sys/proc_internal.h>
111 
112 #include <mach-o/fixup-chains.h>
113 
114 #if CONFIG_MACF
115 #include <security/mac_framework.h>
116 #endif
117 
118 #include <kern/bits.h>
119 
120 #if CONFIG_CSR
121 #include <sys/csr.h>
122 #endif /* CONFIG_CSR */
123 #include <sys/trust_caches.h>
124 #include <libkern/amfi/amfi.h>
125 #include <IOKit/IOBSD.h>
126 
127 #if VM_MAP_DEBUG_APPLE_PROTECT
128 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
129 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
130 
131 #if VM_MAP_DEBUG_FOURK
132 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
133 #endif /* VM_MAP_DEBUG_FOURK */
134 
135 #if DEVELOPMENT || DEBUG
136 
137 static int
138 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
139 {
140 #pragma unused(arg1, arg2)
141 	vm_offset_t     kaddr;
142 	kern_return_t   kr;
143 	int     error = 0;
144 	int     size = 0;
145 
146 	error = sysctl_handle_int(oidp, &size, 0, req);
147 	if (error || !req->newptr) {
148 		return error;
149 	}
150 
151 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
152 	    0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
153 
154 	if (kr == KERN_SUCCESS) {
155 		kmem_free(kernel_map, kaddr, size);
156 	}
157 
158 	return error;
159 }
160 
161 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
162     0, 0, &sysctl_kmem_alloc_contig, "I", "");
163 
164 extern int vm_region_footprint;
165 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
166 
167 #endif /* DEVELOPMENT || DEBUG */
168 
169 static int
170 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 	int     error = 0;
174 	int     value;
175 
176 	value = task_self_region_footprint();
177 	error = SYSCTL_OUT(req, &value, sizeof(int));
178 	if (error) {
179 		return error;
180 	}
181 
182 	if (!req->newptr) {
183 		return 0;
184 	}
185 
186 	error = SYSCTL_IN(req, &value, sizeof(int));
187 	if (error) {
188 		return error;
189 	}
190 	task_self_region_footprint_set(value);
191 	return 0;
192 }
193 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
194 
195 static int
196 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
197 {
198 #pragma unused(arg1, arg2, oidp)
199 	int     error = 0;
200 	int     value;
201 
202 	value = (1 << thread_self_region_page_shift());
203 	error = SYSCTL_OUT(req, &value, sizeof(int));
204 	if (error) {
205 		return error;
206 	}
207 
208 	if (!req->newptr) {
209 		return 0;
210 	}
211 
212 	error = SYSCTL_IN(req, &value, sizeof(int));
213 	if (error) {
214 		return error;
215 	}
216 
217 	if (value != 0 && value != 4096 && value != 16384) {
218 		return EINVAL;
219 	}
220 
221 #if !__ARM_MIXED_PAGE_SIZE__
222 	if (value != vm_map_page_size(current_map())) {
223 		return EINVAL;
224 	}
225 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
226 
227 	thread_self_region_page_shift_set(bit_first(value));
228 	return 0;
229 }
230 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
231 
232 
233 #if DEVELOPMENT || DEBUG
234 extern int panic_on_unsigned_execute;
235 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
236 #endif /* DEVELOPMENT || DEBUG */
237 
238 extern int cs_executable_create_upl;
239 extern int cs_executable_wire;
240 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
241 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
242 
243 extern int apple_protect_pager_count;
244 extern int apple_protect_pager_count_mapped;
245 extern unsigned int apple_protect_pager_cache_limit;
246 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
247 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
248 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
249 
250 #if DEVELOPMENT || DEBUG
251 extern int radar_20146450;
252 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
253 
254 extern int macho_printf;
255 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
256 
257 extern int apple_protect_pager_data_request_debug;
258 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
259 
260 #if __arm64__
261 /* These are meant to support the page table accounting unit test. */
262 extern unsigned int arm_hardware_page_size;
263 extern unsigned int arm_pt_desc_size;
264 extern unsigned int arm_pt_root_size;
265 extern unsigned int inuse_user_tteroot_count;
266 extern unsigned int inuse_kernel_tteroot_count;
267 extern unsigned int inuse_user_ttepages_count;
268 extern unsigned int inuse_kernel_ttepages_count;
269 extern unsigned int inuse_user_ptepages_count;
270 extern unsigned int inuse_kernel_ptepages_count;
271 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
272 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
273 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
274 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
275 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
276 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
277 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
278 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
279 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
280 extern unsigned int free_page_size_tt_count;
281 extern unsigned int free_two_page_size_tt_count;
282 extern unsigned int free_tt_count;
283 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
284 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
285 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
286 #if DEVELOPMENT || DEBUG
287 extern unsigned long pmap_asid_flushes;
288 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
289 extern unsigned long pmap_asid_hits;
290 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
291 extern unsigned long pmap_asid_misses;
292 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
293 #endif
294 #endif /* __arm64__ */
295 
296 #if __arm64__
297 extern int fourk_pager_data_request_debug;
298 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
299 #endif /* __arm64__ */
300 #endif /* DEVELOPMENT || DEBUG */
301 
302 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
303 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
304 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
305 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
306 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
307 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
308 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
309 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
310 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
311 #if VM_SCAN_FOR_SHADOW_CHAIN
312 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
313 extern int proc_shadow_max(void);
314 static int
315 vm_shadow_max SYSCTL_HANDLER_ARGS
316 {
317 #pragma unused(arg1, arg2, oidp)
318 	int value = 0;
319 
320 	if (vm_shadow_max_enabled) {
321 		value = proc_shadow_max();
322 	}
323 
324 	return SYSCTL_OUT(req, &value, sizeof(value));
325 }
326 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
327     0, 0, &vm_shadow_max, "I", "");
328 
329 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
330 
331 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
332 
333 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
334 
335 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
336 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
337 /*
338  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
339  */
340 
341 #if DEVELOPMENT || DEBUG
342 extern int allow_stack_exec, allow_data_exec;
343 
344 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
345 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
346 
347 #endif /* DEVELOPMENT || DEBUG */
348 
349 static const char *prot_values[] = {
350 	"none",
351 	"read-only",
352 	"write-only",
353 	"read-write",
354 	"execute-only",
355 	"read-execute",
356 	"write-execute",
357 	"read-write-execute"
358 };
359 
360 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)361 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
362 {
363 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
364 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
365 }
366 
367 /*
368  * shared_region_unnest_logging: level of logging of unnesting events
369  * 0	- no logging
370  * 1	- throttled logging of unexpected unnesting events (default)
371  * 2	- unthrottled logging of unexpected unnesting events
372  * 3+	- unthrottled logging of all unnesting events
373  */
374 int shared_region_unnest_logging = 1;
375 
376 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
377     &shared_region_unnest_logging, 0, "");
378 
379 int vm_shared_region_unnest_log_interval = 10;
380 int shared_region_unnest_log_count_threshold = 5;
381 
382 
383 #if XNU_TARGET_OS_OSX
384 
385 #if defined (__x86_64__)
386 static int scdir_enforce = 1;
387 #else /* defined (__x86_64__) */
388 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
389 #endif /* defined (__x86_64__) */
390 
391 static char *scdir_path[] = {
392 	"/System/Library/dyld/",
393 	"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
394 	"/System/Cryptexes/OS/System/Library/dyld",
395 	NULL
396 };
397 
398 #else /* XNU_TARGET_OS_OSX */
399 
400 static int scdir_enforce = 0;
401 static char *scdir_path[] = {
402 	"/System/Library/Caches/com.apple.dyld/",
403 	"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
404 	"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
405 	NULL
406 };
407 
408 #endif /* XNU_TARGET_OS_OSX */
409 
410 static char *driverkit_scdir_path[] = {
411 	"/System/DriverKit/System/Library/dyld/",
412 #if XNU_TARGET_OS_OSX
413 	"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
414 #else
415 	"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
416 #endif /* XNU_TARGET_OS_OSX */
417 	"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
418 	NULL
419 };
420 
421 #ifndef SECURE_KERNEL
422 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
423 {
424 #if CONFIG_CSR
425 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
426 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
427 		return EPERM;
428 	}
429 #endif /* CONFIG_CSR */
430 	return sysctl_handle_int(oidp, arg1, arg2, req);
431 }
432 
433 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
434 #endif
435 
436 /* These log rate throttling state variables aren't thread safe, but
437  * are sufficient unto the task.
438  */
439 static int64_t last_unnest_log_time = 0;
440 static int shared_region_unnest_log_count = 0;
441 
442 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)443 log_unnest_badness(
444 	vm_map_t        m,
445 	vm_map_offset_t s,
446 	vm_map_offset_t e,
447 	boolean_t       is_nested_map,
448 	vm_map_offset_t lowest_unnestable_addr)
449 {
450 	struct timeval  tv;
451 
452 	if (shared_region_unnest_logging == 0) {
453 		return;
454 	}
455 
456 	if (shared_region_unnest_logging <= 2 &&
457 	    is_nested_map &&
458 	    s >= lowest_unnestable_addr) {
459 		/*
460 		 * Unnesting of writable map entries is fine.
461 		 */
462 		return;
463 	}
464 
465 	if (shared_region_unnest_logging <= 1) {
466 		microtime(&tv);
467 		if ((tv.tv_sec - last_unnest_log_time) <
468 		    vm_shared_region_unnest_log_interval) {
469 			if (shared_region_unnest_log_count++ >
470 			    shared_region_unnest_log_count_threshold) {
471 				return;
472 			}
473 		} else {
474 			last_unnest_log_time = tv.tv_sec;
475 			shared_region_unnest_log_count = 0;
476 		}
477 	}
478 
479 	DTRACE_VM4(log_unnest_badness,
480 	    vm_map_t, m,
481 	    vm_map_offset_t, s,
482 	    vm_map_offset_t, e,
483 	    vm_map_offset_t, lowest_unnestable_addr);
484 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
485 }
486 
487 int
useracc(user_addr_t addr,user_size_t len,int prot)488 useracc(
489 	user_addr_t     addr,
490 	user_size_t     len,
491 	int     prot)
492 {
493 	vm_map_t        map;
494 
495 	map = current_map();
496 	return vm_map_check_protection(
497 		map,
498 		vm_map_trunc_page(addr,
499 		vm_map_page_mask(map)),
500 		vm_map_round_page(addr + len,
501 		vm_map_page_mask(map)),
502 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
503 }
504 
505 int
vslock(user_addr_t addr,user_size_t len)506 vslock(
507 	user_addr_t     addr,
508 	user_size_t     len)
509 {
510 	kern_return_t   kret;
511 	vm_map_t        map;
512 
513 	map = current_map();
514 	kret = vm_map_wire_kernel(map,
515 	    vm_map_trunc_page(addr,
516 	    vm_map_page_mask(map)),
517 	    vm_map_round_page(addr + len,
518 	    vm_map_page_mask(map)),
519 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
520 	    FALSE);
521 
522 	switch (kret) {
523 	case KERN_SUCCESS:
524 		return 0;
525 	case KERN_INVALID_ADDRESS:
526 	case KERN_NO_SPACE:
527 		return ENOMEM;
528 	case KERN_PROTECTION_FAILURE:
529 		return EACCES;
530 	default:
531 		return EINVAL;
532 	}
533 }
534 
535 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)536 vsunlock(
537 	user_addr_t addr,
538 	user_size_t len,
539 	__unused int dirtied)
540 {
541 #if FIXME  /* [ */
542 	pmap_t          pmap;
543 	vm_page_t       pg;
544 	vm_map_offset_t vaddr;
545 	ppnum_t         paddr;
546 #endif  /* FIXME ] */
547 	kern_return_t   kret;
548 	vm_map_t        map;
549 
550 	map = current_map();
551 
552 #if FIXME  /* [ */
553 	if (dirtied) {
554 		pmap = get_task_pmap(current_task());
555 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
556 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
557 		    vaddr += PAGE_SIZE) {
558 			paddr = pmap_find_phys(pmap, vaddr);
559 			pg = PHYS_TO_VM_PAGE(paddr);
560 			vm_page_set_modified(pg);
561 		}
562 	}
563 #endif  /* FIXME ] */
564 #ifdef  lint
565 	dirtied++;
566 #endif  /* lint */
567 	kret = vm_map_unwire(map,
568 	    vm_map_trunc_page(addr,
569 	    vm_map_page_mask(map)),
570 	    vm_map_round_page(addr + len,
571 	    vm_map_page_mask(map)),
572 	    FALSE);
573 	switch (kret) {
574 	case KERN_SUCCESS:
575 		return 0;
576 	case KERN_INVALID_ADDRESS:
577 	case KERN_NO_SPACE:
578 		return ENOMEM;
579 	case KERN_PROTECTION_FAILURE:
580 		return EACCES;
581 	default:
582 		return EINVAL;
583 	}
584 }
585 
586 int
subyte(user_addr_t addr,int byte)587 subyte(
588 	user_addr_t addr,
589 	int byte)
590 {
591 	char character;
592 
593 	character = (char)byte;
594 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
595 }
596 
597 int
suibyte(user_addr_t addr,int byte)598 suibyte(
599 	user_addr_t addr,
600 	int byte)
601 {
602 	char character;
603 
604 	character = (char)byte;
605 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
606 }
607 
608 int
fubyte(user_addr_t addr)609 fubyte(user_addr_t addr)
610 {
611 	unsigned char byte;
612 
613 	if (copyin(addr, (void *) &byte, sizeof(char))) {
614 		return -1;
615 	}
616 	return byte;
617 }
618 
619 int
fuibyte(user_addr_t addr)620 fuibyte(user_addr_t addr)
621 {
622 	unsigned char byte;
623 
624 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
625 		return -1;
626 	}
627 	return byte;
628 }
629 
630 int
suword(user_addr_t addr,long word)631 suword(
632 	user_addr_t addr,
633 	long word)
634 {
635 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
636 }
637 
638 long
fuword(user_addr_t addr)639 fuword(user_addr_t addr)
640 {
641 	long word = 0;
642 
643 	if (copyin(addr, (void *) &word, sizeof(int))) {
644 		return -1;
645 	}
646 	return word;
647 }
648 
649 /* suiword and fuiword are the same as suword and fuword, respectively */
650 
651 int
suiword(user_addr_t addr,long word)652 suiword(
653 	user_addr_t addr,
654 	long word)
655 {
656 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
657 }
658 
659 long
fuiword(user_addr_t addr)660 fuiword(user_addr_t addr)
661 {
662 	long word = 0;
663 
664 	if (copyin(addr, (void *) &word, sizeof(int))) {
665 		return -1;
666 	}
667 	return word;
668 }
669 
670 /*
671  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
672  * fetching and setting of process-sized size_t and pointer values.
673  */
674 int
sulong(user_addr_t addr,int64_t word)675 sulong(user_addr_t addr, int64_t word)
676 {
677 	if (IS_64BIT_PROCESS(current_proc())) {
678 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
679 	} else {
680 		return suiword(addr, (long)word);
681 	}
682 }
683 
684 int64_t
fulong(user_addr_t addr)685 fulong(user_addr_t addr)
686 {
687 	int64_t longword;
688 
689 	if (IS_64BIT_PROCESS(current_proc())) {
690 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
691 			return -1;
692 		}
693 		return longword;
694 	} else {
695 		return (int64_t)fuiword(addr);
696 	}
697 }
698 
699 int
suulong(user_addr_t addr,uint64_t uword)700 suulong(user_addr_t addr, uint64_t uword)
701 {
702 	if (IS_64BIT_PROCESS(current_proc())) {
703 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
704 	} else {
705 		return suiword(addr, (uint32_t)uword);
706 	}
707 }
708 
709 uint64_t
fuulong(user_addr_t addr)710 fuulong(user_addr_t addr)
711 {
712 	uint64_t ulongword;
713 
714 	if (IS_64BIT_PROCESS(current_proc())) {
715 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
716 			return -1ULL;
717 		}
718 		return ulongword;
719 	} else {
720 		return (uint64_t)fuiword(addr);
721 	}
722 }
723 
724 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)725 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
726 {
727 	return ENOTSUP;
728 }
729 
730 /*
731  * pid_for_task
732  *
733  * Find the BSD process ID for the Mach task associated with the given Mach port
734  * name
735  *
736  * Parameters:	args		User argument descriptor (see below)
737  *
738  * Indirect parameters:	args->t		Mach port name
739  *                      args->pid	Process ID (returned value; see below)
740  *
741  * Returns:	KERL_SUCCESS	Success
742  *              KERN_FAILURE	Not success
743  *
744  * Implicit returns: args->pid		Process ID
745  *
746  */
747 kern_return_t
pid_for_task(struct pid_for_task_args * args)748 pid_for_task(
749 	struct pid_for_task_args *args)
750 {
751 	mach_port_name_t        t = args->t;
752 	user_addr_t             pid_addr  = args->pid;
753 	proc_t p;
754 	task_t          t1;
755 	int     pid = -1;
756 	kern_return_t   err = KERN_SUCCESS;
757 
758 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
759 	AUDIT_ARG(mach_port1, t);
760 
761 	t1 = port_name_to_task_name(t);
762 
763 	if (t1 == TASK_NULL) {
764 		err = KERN_FAILURE;
765 		goto pftout;
766 	} else {
767 		p = get_bsdtask_info(t1);
768 		if (p) {
769 			pid  = proc_pid(p);
770 			err = KERN_SUCCESS;
771 		} else if (is_corpsetask(t1)) {
772 			pid = task_pid(t1);
773 			err = KERN_SUCCESS;
774 		} else {
775 			err = KERN_FAILURE;
776 		}
777 	}
778 	task_deallocate(t1);
779 pftout:
780 	AUDIT_ARG(pid, pid);
781 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
782 	AUDIT_MACH_SYSCALL_EXIT(err);
783 	return err;
784 }
785 
786 /*
787  *
788  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
789  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
790  *
791  */
792 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
793 
794 /*
795  *	Routine:	task_for_pid_posix_check
796  *	Purpose:
797  *			Verify that the current process should be allowed to
798  *			get the target process's task port. This is only
799  *			permitted if:
800  *			- The current process is root
801  *			OR all of the following are true:
802  *			- The target process's real, effective, and saved uids
803  *			  are the same as the current proc's euid,
804  *			- The target process's group set is a subset of the
805  *			  calling process's group set, and
806  *			- The target process hasn't switched credentials.
807  *
808  *	Returns:	TRUE: permitted
809  *			FALSE: denied
810  */
811 static int
task_for_pid_posix_check(proc_t target)812 task_for_pid_posix_check(proc_t target)
813 {
814 	kauth_cred_t targetcred, mycred;
815 	bool checkcredentials;
816 	uid_t myuid;
817 	int allowed;
818 
819 	/* No task_for_pid on bad targets */
820 	if (target->p_stat == SZOMB) {
821 		return FALSE;
822 	}
823 
824 	mycred = kauth_cred_get();
825 	myuid = kauth_cred_getuid(mycred);
826 
827 	/* If we're running as root, the check passes */
828 	if (kauth_cred_issuser(mycred)) {
829 		return TRUE;
830 	}
831 
832 	/* We're allowed to get our own task port */
833 	if (target == current_proc()) {
834 		return TRUE;
835 	}
836 
837 	/*
838 	 * Under DENY, only root can get another proc's task port,
839 	 * so no more checks are needed.
840 	 */
841 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
842 		return FALSE;
843 	}
844 
845 	targetcred = kauth_cred_proc_ref(target);
846 	allowed = TRUE;
847 
848 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
849 
850 	if (checkcredentials) {
851 		/* Do target's ruid, euid, and saved uid match my euid? */
852 		if ((kauth_cred_getuid(targetcred) != myuid) ||
853 		    (kauth_cred_getruid(targetcred) != myuid) ||
854 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
855 			allowed = FALSE;
856 			goto out;
857 		}
858 		/* Are target's groups a subset of my groups? */
859 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
860 		    allowed == 0) {
861 			allowed = FALSE;
862 			goto out;
863 		}
864 	}
865 
866 	/* Has target switched credentials? */
867 	if (target->p_flag & P_SUGID) {
868 		allowed = FALSE;
869 		goto out;
870 	}
871 
872 out:
873 	kauth_cred_unref(&targetcred);
874 	return allowed;
875 }
876 
877 /*
878  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
879  *
880  *	Description:	Waits for the user space daemon to respond to the request
881  *			we made. Function declared non inline to be visible in
882  *			stackshots and spindumps as well as debugging.
883  */
884 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)885 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
886 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
887 {
888 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
889 }
890 
891 /*
892  *	Routine:	task_for_pid
893  *	Purpose:
894  *		Get the task port for another "process", named by its
895  *		process ID on the same host as "target_task".
896  *
897  *		Only permitted to privileged processes, or processes
898  *		with the same user ID.
899  *
900  *		Note: if pid == 0, an error is return no matter who is calling.
901  *
902  * XXX This should be a BSD system call, not a Mach trap!!!
903  */
904 kern_return_t
task_for_pid(struct task_for_pid_args * args)905 task_for_pid(
906 	struct task_for_pid_args *args)
907 {
908 	mach_port_name_t        target_tport = args->target_tport;
909 	int                     pid = args->pid;
910 	user_addr_t             task_addr = args->t;
911 	proc_t                  p = PROC_NULL;
912 	task_t                  t1 = TASK_NULL;
913 	task_t                  task = TASK_NULL;
914 	mach_port_name_t        tret = MACH_PORT_NULL;
915 	ipc_port_t              tfpport = MACH_PORT_NULL;
916 	void                    * sright = NULL;
917 	int                     error = 0;
918 	boolean_t               is_current_proc = FALSE;
919 	struct proc_ident       pident = {0};
920 
921 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
922 	AUDIT_ARG(pid, pid);
923 	AUDIT_ARG(mach_port1, target_tport);
924 
925 	/* Always check if pid == 0 */
926 	if (pid == 0) {
927 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
928 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
929 		return KERN_FAILURE;
930 	}
931 
932 	t1 = port_name_to_task(target_tport);
933 	if (t1 == TASK_NULL) {
934 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
935 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
936 		return KERN_FAILURE;
937 	}
938 
939 
940 	p = proc_find(pid);
941 	if (p == PROC_NULL) {
942 		error = KERN_FAILURE;
943 		goto tfpout;
944 	}
945 	pident = proc_ident(p);
946 	is_current_proc = (p == current_proc());
947 
948 #if CONFIG_AUDIT
949 	AUDIT_ARG(process, p);
950 #endif
951 
952 	if (!(task_for_pid_posix_check(p))) {
953 		error = KERN_FAILURE;
954 		goto tfpout;
955 	}
956 
957 	if (proc_task(p) == TASK_NULL) {
958 		error = KERN_SUCCESS;
959 		goto tfpout;
960 	}
961 
962 	/*
963 	 * Grab a task reference and drop the proc reference as the proc ref
964 	 * shouldn't be held accross upcalls.
965 	 */
966 	task = proc_task(p);
967 	task_reference(task);
968 
969 	proc_rele(p);
970 	p = PROC_NULL;
971 
972 #if CONFIG_MACF
973 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
974 	if (error) {
975 		error = KERN_FAILURE;
976 		goto tfpout;
977 	}
978 #endif
979 
980 	/* If we aren't root and target's task access port is set... */
981 	if (!kauth_cred_issuser(kauth_cred_get()) &&
982 	    !is_current_proc &&
983 	    (task_get_task_access_port(task, &tfpport) == 0) &&
984 	    (tfpport != IPC_PORT_NULL)) {
985 		if (tfpport == IPC_PORT_DEAD) {
986 			error = KERN_PROTECTION_FAILURE;
987 			goto tfpout;
988 		}
989 
990 		/* Call up to the task access server */
991 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
992 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
993 
994 		if (error != MACH_MSG_SUCCESS) {
995 			if (error == MACH_RCV_INTERRUPTED) {
996 				error = KERN_ABORTED;
997 			} else {
998 				error = KERN_FAILURE;
999 			}
1000 			goto tfpout;
1001 		}
1002 	}
1003 
1004 	/* Grant task port access */
1005 	extmod_statistics_incr_task_for_pid(task);
1006 
1007 	/* this reference will be consumed during conversion */
1008 	task_reference(task);
1009 	if (task == current_task()) {
1010 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
1011 		sright = (void *)convert_task_to_port_pinned(task);
1012 	} else {
1013 		sright = (void *)convert_task_to_port(task);
1014 	}
1015 	/* extra task ref consumed */
1016 
1017 	/*
1018 	 * Check if the task has been corpsified. We must do so after conversion
1019 	 * since we don't hold locks and may have grabbed a corpse control port
1020 	 * above which will prevent no-senders notification delivery.
1021 	 */
1022 	if (is_corpsetask(task)) {
1023 		ipc_port_release_send(sright);
1024 		error = KERN_FAILURE;
1025 		goto tfpout;
1026 	}
1027 
1028 	tret = ipc_port_copyout_send(
1029 		sright,
1030 		get_task_ipcspace(current_task()));
1031 
1032 	error = KERN_SUCCESS;
1033 
1034 tfpout:
1035 	task_deallocate(t1);
1036 	AUDIT_ARG(mach_port2, tret);
1037 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1038 
1039 	if (tfpport != IPC_PORT_NULL) {
1040 		ipc_port_release_send(tfpport);
1041 	}
1042 	if (task != TASK_NULL) {
1043 		task_deallocate(task);
1044 	}
1045 	if (p != PROC_NULL) {
1046 		proc_rele(p);
1047 	}
1048 	AUDIT_MACH_SYSCALL_EXIT(error);
1049 	return error;
1050 }
1051 
1052 /*
1053  *	Routine:	task_name_for_pid
1054  *	Purpose:
1055  *		Get the task name port for another "process", named by its
1056  *		process ID on the same host as "target_task".
1057  *
1058  *		Only permitted to privileged processes, or processes
1059  *		with the same user ID.
1060  *
1061  * XXX This should be a BSD system call, not a Mach trap!!!
1062  */
1063 
1064 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1065 task_name_for_pid(
1066 	struct task_name_for_pid_args *args)
1067 {
1068 	mach_port_name_t        target_tport = args->target_tport;
1069 	int                     pid = args->pid;
1070 	user_addr_t             task_addr = args->t;
1071 	proc_t                  p = PROC_NULL;
1072 	task_t                  t1 = TASK_NULL;
1073 	mach_port_name_t        tret = MACH_PORT_NULL;
1074 	void * sright;
1075 	int error = 0, refheld = 0;
1076 	kauth_cred_t target_cred;
1077 
1078 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1079 	AUDIT_ARG(pid, pid);
1080 	AUDIT_ARG(mach_port1, target_tport);
1081 
1082 	t1 = port_name_to_task(target_tport);
1083 	if (t1 == TASK_NULL) {
1084 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1085 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1086 		return KERN_FAILURE;
1087 	}
1088 
1089 	p = proc_find(pid);
1090 	if (p != PROC_NULL) {
1091 		AUDIT_ARG(process, p);
1092 		target_cred = kauth_cred_proc_ref(p);
1093 		refheld = 1;
1094 
1095 		if ((p->p_stat != SZOMB)
1096 		    && ((current_proc() == p)
1097 		    || kauth_cred_issuser(kauth_cred_get())
1098 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1099 		    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1100 			if (proc_task(p) != TASK_NULL) {
1101 				struct proc_ident pident = proc_ident(p);
1102 
1103 				task_t task = proc_task(p);
1104 
1105 				task_reference(task);
1106 				proc_rele(p);
1107 				p = PROC_NULL;
1108 #if CONFIG_MACF
1109 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1110 				if (error) {
1111 					task_deallocate(task);
1112 					goto noperm;
1113 				}
1114 #endif
1115 				sright = (void *)convert_task_name_to_port(task);
1116 				task = NULL;
1117 				tret = ipc_port_copyout_send(sright,
1118 				    get_task_ipcspace(current_task()));
1119 			} else {
1120 				tret  = MACH_PORT_NULL;
1121 			}
1122 
1123 			AUDIT_ARG(mach_port2, tret);
1124 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1125 			task_deallocate(t1);
1126 			error = KERN_SUCCESS;
1127 			goto tnfpout;
1128 		}
1129 	}
1130 
1131 #if CONFIG_MACF
1132 noperm:
1133 #endif
1134 	task_deallocate(t1);
1135 	tret = MACH_PORT_NULL;
1136 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1137 	error = KERN_FAILURE;
1138 tnfpout:
1139 	if (refheld != 0) {
1140 		kauth_cred_unref(&target_cred);
1141 	}
1142 	if (p != PROC_NULL) {
1143 		proc_rele(p);
1144 	}
1145 	AUDIT_MACH_SYSCALL_EXIT(error);
1146 	return error;
1147 }
1148 
1149 /*
1150  *	Routine:	task_inspect_for_pid
1151  *	Purpose:
1152  *		Get the task inspect port for another "process", named by its
1153  *		process ID on the same host as "target_task".
1154  */
1155 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1156 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1157 {
1158 	mach_port_name_t        target_tport = args->target_tport;
1159 	int                     pid = args->pid;
1160 	user_addr_t             task_addr = args->t;
1161 
1162 	proc_t                  proc = PROC_NULL;
1163 	task_t                  t1 = TASK_NULL;
1164 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1165 	mach_port_name_t        tret = MACH_PORT_NULL;
1166 	ipc_port_t              tfpport = MACH_PORT_NULL;
1167 	int                     error = 0;
1168 	void                    *sright = NULL;
1169 	boolean_t               is_current_proc = FALSE;
1170 	struct proc_ident       pident = {0};
1171 
1172 	/* Disallow inspect port for kernel_task */
1173 	if (pid == 0) {
1174 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1175 		return EPERM;
1176 	}
1177 
1178 	t1 = port_name_to_task(target_tport);
1179 	if (t1 == TASK_NULL) {
1180 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1181 		return EINVAL;
1182 	}
1183 
1184 	proc = proc_find(pid);
1185 	if (proc == PROC_NULL) {
1186 		error = ESRCH;
1187 		goto tifpout;
1188 	}
1189 	pident = proc_ident(proc);
1190 	is_current_proc = (proc == current_proc());
1191 
1192 	if (!(task_for_pid_posix_check(proc))) {
1193 		error = EPERM;
1194 		goto tifpout;
1195 	}
1196 
1197 	task_insp = proc_task(proc);
1198 	if (task_insp == TASK_INSPECT_NULL) {
1199 		goto tifpout;
1200 	}
1201 
1202 	/*
1203 	 * Grab a task reference and drop the proc reference before making any upcalls.
1204 	 */
1205 	task_reference(task_insp);
1206 
1207 	proc_rele(proc);
1208 	proc = PROC_NULL;
1209 
1210 #if CONFIG_MACF
1211 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1212 	if (error) {
1213 		error = EPERM;
1214 		goto tifpout;
1215 	}
1216 #endif
1217 
1218 	/* If we aren't root and target's task access port is set... */
1219 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1220 	    !is_current_proc &&
1221 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1222 	    (tfpport != IPC_PORT_NULL)) {
1223 		if (tfpport == IPC_PORT_DEAD) {
1224 			error = EACCES;
1225 			goto tifpout;
1226 		}
1227 
1228 
1229 		/* Call up to the task access server */
1230 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1231 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1232 
1233 		if (error != MACH_MSG_SUCCESS) {
1234 			if (error == MACH_RCV_INTERRUPTED) {
1235 				error = EINTR;
1236 			} else {
1237 				error = EPERM;
1238 			}
1239 			goto tifpout;
1240 		}
1241 	}
1242 
1243 	/* Check if the task has been corpsified */
1244 	if (is_corpsetask(task_insp)) {
1245 		error = EACCES;
1246 		goto tifpout;
1247 	}
1248 
1249 	/* could be IP_NULL, consumes a ref */
1250 	sright = (void*) convert_task_inspect_to_port(task_insp);
1251 	task_insp = TASK_INSPECT_NULL;
1252 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1253 
1254 tifpout:
1255 	task_deallocate(t1);
1256 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1257 	if (proc != PROC_NULL) {
1258 		proc_rele(proc);
1259 	}
1260 	if (tfpport != IPC_PORT_NULL) {
1261 		ipc_port_release_send(tfpport);
1262 	}
1263 	if (task_insp != TASK_INSPECT_NULL) {
1264 		task_deallocate(task_insp);
1265 	}
1266 
1267 	*ret = error;
1268 	return error;
1269 }
1270 
1271 /*
1272  *	Routine:	task_read_for_pid
1273  *	Purpose:
1274  *		Get the task read port for another "process", named by its
1275  *		process ID on the same host as "target_task".
1276  */
1277 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1278 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1279 {
1280 	mach_port_name_t        target_tport = args->target_tport;
1281 	int                     pid = args->pid;
1282 	user_addr_t             task_addr = args->t;
1283 
1284 	proc_t                  proc = PROC_NULL;
1285 	task_t                  t1 = TASK_NULL;
1286 	task_read_t             task_read = TASK_READ_NULL;
1287 	mach_port_name_t        tret = MACH_PORT_NULL;
1288 	ipc_port_t              tfpport = MACH_PORT_NULL;
1289 	int                     error = 0;
1290 	void                    *sright = NULL;
1291 	boolean_t               is_current_proc = FALSE;
1292 	struct proc_ident       pident = {0};
1293 
1294 	/* Disallow read port for kernel_task */
1295 	if (pid == 0) {
1296 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1297 		return EPERM;
1298 	}
1299 
1300 	t1 = port_name_to_task(target_tport);
1301 	if (t1 == TASK_NULL) {
1302 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1303 		return EINVAL;
1304 	}
1305 
1306 	proc = proc_find(pid);
1307 	if (proc == PROC_NULL) {
1308 		error = ESRCH;
1309 		goto trfpout;
1310 	}
1311 	pident = proc_ident(proc);
1312 	is_current_proc = (proc == current_proc());
1313 
1314 	if (!(task_for_pid_posix_check(proc))) {
1315 		error = EPERM;
1316 		goto trfpout;
1317 	}
1318 
1319 	task_read = proc_task(proc);
1320 	if (task_read == TASK_INSPECT_NULL) {
1321 		goto trfpout;
1322 	}
1323 
1324 	/*
1325 	 * Grab a task reference and drop the proc reference before making any upcalls.
1326 	 */
1327 	task_reference(task_read);
1328 
1329 	proc_rele(proc);
1330 	proc = PROC_NULL;
1331 
1332 #if CONFIG_MACF
1333 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1334 	if (error) {
1335 		error = EPERM;
1336 		goto trfpout;
1337 	}
1338 #endif
1339 
1340 	/* If we aren't root and target's task access port is set... */
1341 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1342 	    !is_current_proc &&
1343 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1344 	    (tfpport != IPC_PORT_NULL)) {
1345 		if (tfpport == IPC_PORT_DEAD) {
1346 			error = EACCES;
1347 			goto trfpout;
1348 		}
1349 
1350 
1351 		/* Call up to the task access server */
1352 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1353 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1354 
1355 		if (error != MACH_MSG_SUCCESS) {
1356 			if (error == MACH_RCV_INTERRUPTED) {
1357 				error = EINTR;
1358 			} else {
1359 				error = EPERM;
1360 			}
1361 			goto trfpout;
1362 		}
1363 	}
1364 
1365 	/* Check if the task has been corpsified */
1366 	if (is_corpsetask(task_read)) {
1367 		error = EACCES;
1368 		goto trfpout;
1369 	}
1370 
1371 	/* could be IP_NULL, consumes a ref */
1372 	sright = (void*) convert_task_read_to_port(task_read);
1373 	task_read = TASK_READ_NULL;
1374 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1375 
1376 trfpout:
1377 	task_deallocate(t1);
1378 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1379 	if (proc != PROC_NULL) {
1380 		proc_rele(proc);
1381 	}
1382 	if (tfpport != IPC_PORT_NULL) {
1383 		ipc_port_release_send(tfpport);
1384 	}
1385 	if (task_read != TASK_READ_NULL) {
1386 		task_deallocate(task_read);
1387 	}
1388 
1389 	*ret = error;
1390 	return error;
1391 }
1392 
1393 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1394 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1395 {
1396 	task_t  target = NULL;
1397 	proc_t  targetproc = PROC_NULL;
1398 	int     pid = args->pid;
1399 	int     error = 0;
1400 	mach_port_t tfpport = MACH_PORT_NULL;
1401 
1402 	if (pid == 0) {
1403 		error = EPERM;
1404 		goto out;
1405 	}
1406 
1407 	targetproc = proc_find(pid);
1408 	if (targetproc == PROC_NULL) {
1409 		error = ESRCH;
1410 		goto out;
1411 	}
1412 
1413 	if (!task_for_pid_posix_check(targetproc) &&
1414 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1415 		error = EPERM;
1416 		goto out;
1417 	}
1418 
1419 #if CONFIG_MACF
1420 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1421 	if (error) {
1422 		error = EPERM;
1423 		goto out;
1424 	}
1425 #endif
1426 
1427 	target = proc_task(targetproc);
1428 #if XNU_TARGET_OS_OSX
1429 	if (target != TASK_NULL) {
1430 		/* If we aren't root and target's task access port is set... */
1431 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1432 		    targetproc != current_proc() &&
1433 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1434 		    (tfpport != IPC_PORT_NULL)) {
1435 			if (tfpport == IPC_PORT_DEAD) {
1436 				error = EACCES;
1437 				goto out;
1438 			}
1439 
1440 			/* Call up to the task access server */
1441 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1442 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1443 
1444 			if (error != MACH_MSG_SUCCESS) {
1445 				if (error == MACH_RCV_INTERRUPTED) {
1446 					error = EINTR;
1447 				} else {
1448 					error = EPERM;
1449 				}
1450 				goto out;
1451 			}
1452 		}
1453 	}
1454 #endif /* XNU_TARGET_OS_OSX */
1455 
1456 	task_reference(target);
1457 	error = task_pidsuspend(target);
1458 	if (error) {
1459 		if (error == KERN_INVALID_ARGUMENT) {
1460 			error = EINVAL;
1461 		} else {
1462 			error = EPERM;
1463 		}
1464 	}
1465 #if CONFIG_MEMORYSTATUS
1466 	else {
1467 		memorystatus_on_suspend(targetproc);
1468 	}
1469 #endif
1470 
1471 	task_deallocate(target);
1472 
1473 out:
1474 	if (tfpport != IPC_PORT_NULL) {
1475 		ipc_port_release_send(tfpport);
1476 	}
1477 
1478 	if (targetproc != PROC_NULL) {
1479 		proc_rele(targetproc);
1480 	}
1481 	*ret = error;
1482 	return error;
1483 }
1484 
1485 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1486 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1487 {
1488 	mach_port_name_t        target_tport = args->target_tport;
1489 	int                     pid = args->pid;
1490 	user_addr_t             task_addr = args->t;
1491 	proc_t                  p = PROC_NULL;
1492 	task_t                  t1 = TASK_NULL;
1493 	task_t                  task = TASK_NULL;
1494 	mach_port_name_t        tret = MACH_PORT_NULL;
1495 	ipc_port_t              tfpport = MACH_PORT_NULL;
1496 	ipc_port_t              sright = NULL;
1497 	int                     error = 0;
1498 	boolean_t               is_current_proc = FALSE;
1499 	struct proc_ident       pident = {0};
1500 
1501 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1502 	AUDIT_ARG(pid, pid);
1503 	AUDIT_ARG(mach_port1, target_tport);
1504 
1505 	/* Always check if pid == 0 */
1506 	if (pid == 0) {
1507 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1508 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1509 		return KERN_FAILURE;
1510 	}
1511 
1512 	t1 = port_name_to_task(target_tport);
1513 	if (t1 == TASK_NULL) {
1514 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1515 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1516 		return KERN_FAILURE;
1517 	}
1518 
1519 	p = proc_find(pid);
1520 	if (p == PROC_NULL) {
1521 		error = KERN_FAILURE;
1522 		goto tfpout;
1523 	}
1524 	pident = proc_ident(p);
1525 	is_current_proc = (p == current_proc());
1526 
1527 #if CONFIG_AUDIT
1528 	AUDIT_ARG(process, p);
1529 #endif
1530 
1531 	if (!(task_for_pid_posix_check(p))) {
1532 		error = KERN_FAILURE;
1533 		goto tfpout;
1534 	}
1535 
1536 	if (proc_task(p) == TASK_NULL) {
1537 		error = KERN_SUCCESS;
1538 		goto tfpout;
1539 	}
1540 
1541 	/*
1542 	 * Grab a task reference and drop the proc reference before making any upcalls.
1543 	 */
1544 	task = proc_task(p);
1545 	task_reference(task);
1546 
1547 	proc_rele(p);
1548 	p = PROC_NULL;
1549 
1550 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1551 #if CONFIG_MACF
1552 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1553 		if (error) {
1554 			error = KERN_FAILURE;
1555 			goto tfpout;
1556 		}
1557 #endif
1558 
1559 		/* If we aren't root and target's task access port is set... */
1560 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1561 		    !is_current_proc &&
1562 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1563 		    (tfpport != IPC_PORT_NULL)) {
1564 			if (tfpport == IPC_PORT_DEAD) {
1565 				error = KERN_PROTECTION_FAILURE;
1566 				goto tfpout;
1567 			}
1568 
1569 
1570 			/* Call up to the task access server */
1571 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1572 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1573 
1574 			if (error != MACH_MSG_SUCCESS) {
1575 				if (error == MACH_RCV_INTERRUPTED) {
1576 					error = KERN_ABORTED;
1577 				} else {
1578 					error = KERN_FAILURE;
1579 				}
1580 				goto tfpout;
1581 			}
1582 		}
1583 	}
1584 
1585 	/* Check if the task has been corpsified */
1586 	if (is_corpsetask(task)) {
1587 		error = KERN_FAILURE;
1588 		goto tfpout;
1589 	}
1590 
1591 	error = task_get_debug_control_port(task, &sright);
1592 	if (error != KERN_SUCCESS) {
1593 		goto tfpout;
1594 	}
1595 
1596 	tret = ipc_port_copyout_send(
1597 		sright,
1598 		get_task_ipcspace(current_task()));
1599 
1600 	error = KERN_SUCCESS;
1601 
1602 tfpout:
1603 	task_deallocate(t1);
1604 	AUDIT_ARG(mach_port2, tret);
1605 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1606 
1607 	if (tfpport != IPC_PORT_NULL) {
1608 		ipc_port_release_send(tfpport);
1609 	}
1610 	if (task != TASK_NULL) {
1611 		task_deallocate(task);
1612 	}
1613 	if (p != PROC_NULL) {
1614 		proc_rele(p);
1615 	}
1616 	AUDIT_MACH_SYSCALL_EXIT(error);
1617 	return error;
1618 }
1619 
1620 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1621 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1622 {
1623 	task_t  target = NULL;
1624 	proc_t  targetproc = PROC_NULL;
1625 	int     pid = args->pid;
1626 	int     error = 0;
1627 	mach_port_t tfpport = MACH_PORT_NULL;
1628 
1629 	if (pid == 0) {
1630 		error = EPERM;
1631 		goto out;
1632 	}
1633 
1634 	targetproc = proc_find(pid);
1635 	if (targetproc == PROC_NULL) {
1636 		error = ESRCH;
1637 		goto out;
1638 	}
1639 
1640 	if (!task_for_pid_posix_check(targetproc) &&
1641 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1642 		error = EPERM;
1643 		goto out;
1644 	}
1645 
1646 #if CONFIG_MACF
1647 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1648 	if (error) {
1649 		error = EPERM;
1650 		goto out;
1651 	}
1652 #endif
1653 
1654 	target = proc_task(targetproc);
1655 #if XNU_TARGET_OS_OSX
1656 	if (target != TASK_NULL) {
1657 		/* If we aren't root and target's task access port is set... */
1658 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1659 		    targetproc != current_proc() &&
1660 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1661 		    (tfpport != IPC_PORT_NULL)) {
1662 			if (tfpport == IPC_PORT_DEAD) {
1663 				error = EACCES;
1664 				goto out;
1665 			}
1666 
1667 			/* Call up to the task access server */
1668 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1669 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1670 
1671 			if (error != MACH_MSG_SUCCESS) {
1672 				if (error == MACH_RCV_INTERRUPTED) {
1673 					error = EINTR;
1674 				} else {
1675 					error = EPERM;
1676 				}
1677 				goto out;
1678 			}
1679 		}
1680 	}
1681 #endif /* XNU_TARGET_OS_OSX */
1682 
1683 #if !XNU_TARGET_OS_OSX
1684 #if SOCKETS
1685 	resume_proc_sockets(targetproc);
1686 #endif /* SOCKETS */
1687 #endif /* !XNU_TARGET_OS_OSX */
1688 
1689 	task_reference(target);
1690 
1691 #if CONFIG_MEMORYSTATUS
1692 	memorystatus_on_resume(targetproc);
1693 #endif
1694 
1695 	error = task_pidresume(target);
1696 	if (error) {
1697 		if (error == KERN_INVALID_ARGUMENT) {
1698 			error = EINVAL;
1699 		} else {
1700 			if (error == KERN_MEMORY_ERROR) {
1701 				psignal(targetproc, SIGKILL);
1702 				error = EIO;
1703 			} else {
1704 				error = EPERM;
1705 			}
1706 		}
1707 	}
1708 
1709 	task_deallocate(target);
1710 
1711 out:
1712 	if (tfpport != IPC_PORT_NULL) {
1713 		ipc_port_release_send(tfpport);
1714 	}
1715 
1716 	if (targetproc != PROC_NULL) {
1717 		proc_rele(targetproc);
1718 	}
1719 
1720 	*ret = error;
1721 	return error;
1722 }
1723 
1724 #if !XNU_TARGET_OS_OSX
1725 /*
1726  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1727  * When a process is specified, this call is blocking, otherwise we wake up the
1728  * freezer thread and do not block on a process being frozen.
1729  */
1730 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1731 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1732 {
1733 	int     error = 0;
1734 	proc_t  targetproc = PROC_NULL;
1735 	int     pid = args->pid;
1736 
1737 #ifndef CONFIG_FREEZE
1738 	#pragma unused(pid)
1739 #else
1740 
1741 	/*
1742 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1743 	 */
1744 
1745 	if (pid >= 0) {
1746 		targetproc = proc_find(pid);
1747 
1748 		if (targetproc == PROC_NULL) {
1749 			error = ESRCH;
1750 			goto out;
1751 		}
1752 
1753 		if (!task_for_pid_posix_check(targetproc)) {
1754 			error = EPERM;
1755 			goto out;
1756 		}
1757 	}
1758 
1759 #if CONFIG_MACF
1760 	//Note that targetproc may be null
1761 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1762 	if (error) {
1763 		error = EPERM;
1764 		goto out;
1765 	}
1766 #endif
1767 
1768 	if (pid == -2) {
1769 		vm_pageout_anonymous_pages();
1770 	} else if (pid == -1) {
1771 		memorystatus_on_inactivity(targetproc);
1772 	} else {
1773 		error = memorystatus_freeze_process_sync(targetproc);
1774 	}
1775 
1776 out:
1777 
1778 #endif /* CONFIG_FREEZE */
1779 
1780 	if (targetproc != PROC_NULL) {
1781 		proc_rele(targetproc);
1782 	}
1783 	*ret = error;
1784 	return error;
1785 }
1786 #endif /* !XNU_TARGET_OS_OSX */
1787 
1788 #if SOCKETS
1789 int
networking_memstatus_callout(proc_t p,uint32_t status)1790 networking_memstatus_callout(proc_t p, uint32_t status)
1791 {
1792 	struct fileproc *fp;
1793 
1794 	/*
1795 	 * proc list lock NOT held
1796 	 * proc lock NOT held
1797 	 * a reference on the proc has been held / shall be dropped by the caller.
1798 	 */
1799 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1800 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1801 
1802 	proc_fdlock(p);
1803 
1804 	fdt_foreach(fp, p) {
1805 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1806 #if NECP
1807 		case DTYPE_NETPOLICY:
1808 			necp_fd_memstatus(p, status,
1809 			    (struct necp_fd_data *)fp_get_data(fp));
1810 			break;
1811 #endif /* NECP */
1812 #if SKYWALK
1813 		case DTYPE_CHANNEL:
1814 			kern_channel_memstatus(p, status,
1815 			    (struct kern_channel *)fp_get_data(fp));
1816 			break;
1817 #endif /* SKYWALK */
1818 		default:
1819 			break;
1820 		}
1821 	}
1822 	proc_fdunlock(p);
1823 
1824 	return 1;
1825 }
1826 
1827 #if SKYWALK
1828 /*
1829  * Since we make multiple passes across the fileproc array, record the
1830  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1831  * large enough to accomodate most, if not all cases.  If we find more,
1832  * we'll go to the slow path during second pass.
1833  */
1834 #define MAX_CHANNELS    8       /* should be more than enough */
1835 #endif /* SKYWALK */
1836 
1837 static int
networking_defunct_callout(proc_t p,void * arg)1838 networking_defunct_callout(proc_t p, void *arg)
1839 {
1840 	struct pid_shutdown_sockets_args *args = arg;
1841 	int pid = args->pid;
1842 	int level = args->level;
1843 	struct fileproc *fp;
1844 #if SKYWALK
1845 	int i;
1846 	int channel_count = 0;
1847 	struct kern_channel *channel_array[MAX_CHANNELS];
1848 
1849 	bzero(&channel_array, sizeof(channel_array));
1850 #endif /* SKYWALK */
1851 
1852 	proc_fdlock(p);
1853 
1854 	fdt_foreach(fp, p) {
1855 		struct fileglob *fg = fp->fp_glob;
1856 
1857 		switch (FILEGLOB_DTYPE(fg)) {
1858 		case DTYPE_SOCKET: {
1859 			struct socket *so = (struct socket *)fg_get_data(fg);
1860 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1861 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1862 				/* Call networking stack with socket and level */
1863 				(void)socket_defunct(p, so, level);
1864 			}
1865 			break;
1866 		}
1867 #if NECP
1868 		case DTYPE_NETPOLICY:
1869 			/* first pass: defunct necp and get stats for ntstat */
1870 			if (proc_getpid(p) == pid) {
1871 				necp_fd_defunct(p,
1872 				    (struct necp_fd_data *)fg_get_data(fg));
1873 			}
1874 			break;
1875 #endif /* NECP */
1876 #if SKYWALK
1877 		case DTYPE_CHANNEL:
1878 			/* first pass: get channels and total count */
1879 			if (proc_getpid(p) == pid) {
1880 				if (channel_count < MAX_CHANNELS) {
1881 					channel_array[channel_count] =
1882 					    (struct kern_channel *)fg_get_data(fg);
1883 				}
1884 				++channel_count;
1885 			}
1886 			break;
1887 #endif /* SKYWALK */
1888 		default:
1889 			break;
1890 		}
1891 	}
1892 
1893 #if SKYWALK
1894 	/*
1895 	 * Second pass: defunct channels/flows (after NECP).  Handle
1896 	 * the common case of up to MAX_CHANNELS count with fast path,
1897 	 * and traverse the fileproc array again only if we exceed it.
1898 	 */
1899 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1900 		ASSERT(proc_getpid(p) == pid);
1901 		for (i = 0; i < channel_count; i++) {
1902 			ASSERT(channel_array[i] != NULL);
1903 			kern_channel_defunct(p, channel_array[i]);
1904 		}
1905 	} else if (channel_count != 0) {
1906 		ASSERT(proc_getpid(p) == pid);
1907 		fdt_foreach(fp, p) {
1908 			struct fileglob *fg = fp->fp_glob;
1909 
1910 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1911 				kern_channel_defunct(p,
1912 				    (struct kern_channel *)fg_get_data(fg));
1913 			}
1914 		}
1915 	}
1916 #endif /* SKYWALK */
1917 	proc_fdunlock(p);
1918 
1919 	return PROC_RETURNED;
1920 }
1921 
1922 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1923 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1924 {
1925 	int                             error = 0;
1926 	proc_t                          targetproc = PROC_NULL;
1927 	int                             pid = args->pid;
1928 	int                             level = args->level;
1929 
1930 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1931 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1932 		error = EINVAL;
1933 		goto out;
1934 	}
1935 
1936 	targetproc = proc_find(pid);
1937 	if (targetproc == PROC_NULL) {
1938 		error = ESRCH;
1939 		goto out;
1940 	}
1941 
1942 	if (!task_for_pid_posix_check(targetproc) &&
1943 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1944 		error = EPERM;
1945 		goto out;
1946 	}
1947 
1948 #if CONFIG_MACF
1949 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1950 	if (error) {
1951 		error = EPERM;
1952 		goto out;
1953 	}
1954 #endif
1955 
1956 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1957 	    networking_defunct_callout, args, NULL, NULL);
1958 
1959 out:
1960 	if (targetproc != PROC_NULL) {
1961 		proc_rele(targetproc);
1962 	}
1963 	*ret = error;
1964 	return error;
1965 }
1966 
1967 #endif /* SOCKETS */
1968 
1969 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1970 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1971     __unused int arg2, struct sysctl_req *req)
1972 {
1973 	int error = 0;
1974 	int new_value;
1975 
1976 	error = SYSCTL_OUT(req, arg1, sizeof(int));
1977 	if (error || req->newptr == USER_ADDR_NULL) {
1978 		return error;
1979 	}
1980 
1981 	if (!kauth_cred_issuser(kauth_cred_get())) {
1982 		return EPERM;
1983 	}
1984 
1985 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1986 		goto out;
1987 	}
1988 	if ((new_value == KERN_TFP_POLICY_DENY)
1989 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1990 		tfp_policy = new_value;
1991 	} else {
1992 		error = EINVAL;
1993 	}
1994 out:
1995 	return error;
1996 }
1997 
1998 #if defined(SECURE_KERNEL)
1999 static int kern_secure_kernel = 1;
2000 #else
2001 static int kern_secure_kernel = 0;
2002 #endif
2003 
2004 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2005 
2006 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2007 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2008     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2009 
2010 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2011     &shared_region_trace_level, 0, "");
2012 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2013     &shared_region_version, 0, "");
2014 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2015     &shared_region_persistence, 0, "");
2016 
2017 /*
2018  * shared_region_check_np:
2019  *
2020  * This system call is intended for dyld.
2021  *
2022  * dyld calls this when any process starts to see if the process's shared
2023  * region is already set up and ready to use.
2024  * This call returns the base address of the first mapping in the
2025  * process's shared region's first mapping.
2026  * dyld will then check what's mapped at that address.
2027  *
2028  * If the shared region is empty, dyld will then attempt to map the shared
2029  * cache file in the shared region via the shared_region_map_np() system call.
2030  *
2031  * If something's already mapped in the shared region, dyld will check if it
2032  * matches the shared cache it would like to use for that process.
2033  * If it matches, evrything's ready and the process can proceed and use the
2034  * shared region.
2035  * If it doesn't match, dyld will unmap the shared region and map the shared
2036  * cache into the process's address space via mmap().
2037  *
2038  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2039  * the shared region. We will remove the shared_region reference from the task.
2040  *
2041  * ERROR VALUES
2042  * EINVAL	no shared region
2043  * ENOMEM	shared region is empty
2044  * EFAULT	bad address for "start_address"
2045  */
2046 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2047 shared_region_check_np(
2048 	__unused struct proc                    *p,
2049 	struct shared_region_check_np_args      *uap,
2050 	__unused int                            *retvalp)
2051 {
2052 	vm_shared_region_t      shared_region;
2053 	mach_vm_offset_t        start_address = 0;
2054 	int                     error = 0;
2055 	kern_return_t           kr;
2056 	task_t                  task = current_task();
2057 
2058 	SHARED_REGION_TRACE_DEBUG(
2059 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2060 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2061 		proc_getpid(p), p->p_comm,
2062 		(uint64_t)uap->start_address));
2063 
2064 	/*
2065 	 * Special value of start_address used to indicate that map_with_linking() should
2066 	 * no longer be allowed in this process
2067 	 */
2068 	if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2069 		p->p_disallow_map_with_linking = TRUE;
2070 		return 0;
2071 	}
2072 
2073 	/* retrieve the current tasks's shared region */
2074 	shared_region = vm_shared_region_get(task);
2075 	if (shared_region != NULL) {
2076 		/*
2077 		 * A NULL argument is used by dyld to indicate the task
2078 		 * has unmapped its shared region.
2079 		 */
2080 		if (uap->start_address == 0) {
2081 			/* unmap it first */
2082 			vm_shared_region_remove(task, shared_region);
2083 			vm_shared_region_set(task, NULL);
2084 		} else {
2085 			/* retrieve address of its first mapping... */
2086 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2087 			if (kr != KERN_SUCCESS) {
2088 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2089 				    "check_np(0x%llx) "
2090 				    "vm_shared_region_start_address() failed\n",
2091 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2092 				    proc_getpid(p), p->p_comm,
2093 				    (uint64_t)uap->start_address));
2094 				error = ENOMEM;
2095 			} else {
2096 #if __has_feature(ptrauth_calls)
2097 				/*
2098 				 * Remap any section of the shared library that
2099 				 * has authenticated pointers into private memory.
2100 				 */
2101 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2102 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2103 					    "check_np(0x%llx) "
2104 					    "vm_shared_region_auth_remap() failed\n",
2105 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2106 					    proc_getpid(p), p->p_comm,
2107 					    (uint64_t)uap->start_address));
2108 					error = ENOMEM;
2109 				}
2110 #endif /* __has_feature(ptrauth_calls) */
2111 
2112 				/* ... and give it to the caller */
2113 				if (error == 0) {
2114 					error = copyout(&start_address,
2115 					    (user_addr_t) uap->start_address,
2116 					    sizeof(start_address));
2117 					if (error != 0) {
2118 						SHARED_REGION_TRACE_ERROR(
2119 							("shared_region: %p [%d(%s)] "
2120 							"check_np(0x%llx) "
2121 							"copyout(0x%llx) error %d\n",
2122 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2123 							proc_getpid(p), p->p_comm,
2124 							(uint64_t)uap->start_address, (uint64_t)start_address,
2125 							error));
2126 					}
2127 				}
2128 			}
2129 		}
2130 		vm_shared_region_deallocate(shared_region);
2131 	} else {
2132 		/* no shared region ! */
2133 		error = EINVAL;
2134 	}
2135 
2136 	SHARED_REGION_TRACE_DEBUG(
2137 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2138 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2139 		proc_getpid(p), p->p_comm,
2140 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2141 
2142 	return error;
2143 }
2144 
2145 
2146 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2147 shared_region_copyin(
2148 	struct proc  *p,
2149 	user_addr_t  user_addr,
2150 	unsigned int count,
2151 	unsigned int element_size,
2152 	void         *kernel_data)
2153 {
2154 	int             error = 0;
2155 	vm_size_t       size = count * element_size;
2156 
2157 	error = copyin(user_addr, kernel_data, size);
2158 	if (error) {
2159 		SHARED_REGION_TRACE_ERROR(
2160 			("shared_region: %p [%d(%s)] map(): "
2161 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2162 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2163 			proc_getpid(p), p->p_comm,
2164 			(uint64_t)user_addr, (long)size, error));
2165 	}
2166 	return error;
2167 }
2168 
2169 /*
2170  * A reasonable upper limit to prevent overflow of allocation/copyin.
2171  */
2172 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2173 
2174 /* forward declaration */
2175 __attribute__((noinline))
2176 static void shared_region_map_and_slide_cleanup(
2177 	struct proc              *p,
2178 	uint32_t                 files_count,
2179 	struct _sr_file_mappings *sr_file_mappings,
2180 	struct vm_shared_region  *shared_region);
2181 
2182 /*
2183  * Setup part of _shared_region_map_and_slide().
2184  * It had to be broken out of _shared_region_map_and_slide() to
2185  * prevent compiler inlining from blowing out the stack.
2186  */
2187 __attribute__((noinline))
2188 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2189 shared_region_map_and_slide_setup(
2190 	struct proc                         *p,
2191 	uint32_t                            files_count,
2192 	struct shared_file_np               *files,
2193 	uint32_t                            mappings_count,
2194 	struct shared_file_mapping_slide_np *mappings,
2195 	struct _sr_file_mappings            **sr_file_mappings,
2196 	struct vm_shared_region             **shared_region_ptr,
2197 	struct vnode                        *rdir_vp)
2198 {
2199 	int                             error = 0;
2200 	struct _sr_file_mappings        *srfmp;
2201 	uint32_t                        mappings_next;
2202 	struct vnode_attr               va;
2203 	off_t                           fs;
2204 #if CONFIG_MACF
2205 	vm_prot_t                       maxprot = VM_PROT_ALL;
2206 #endif
2207 	uint32_t                        i;
2208 	struct vm_shared_region         *shared_region = NULL;
2209 	boolean_t                       is_driverkit = task_is_driver(current_task());
2210 
2211 	SHARED_REGION_TRACE_DEBUG(
2212 		("shared_region: %p [%d(%s)] -> map\n",
2213 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2214 		proc_getpid(p), p->p_comm));
2215 
2216 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2217 		error = E2BIG;
2218 		goto done;
2219 	}
2220 	if (files_count == 0) {
2221 		error = EINVAL;
2222 		goto done;
2223 	}
2224 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2225 	    Z_WAITOK | Z_ZERO);
2226 	if (*sr_file_mappings == NULL) {
2227 		error = ENOMEM;
2228 		goto done;
2229 	}
2230 	mappings_next = 0;
2231 	for (i = 0; i < files_count; i++) {
2232 		srfmp = &(*sr_file_mappings)[i];
2233 		srfmp->fd = files[i].sf_fd;
2234 		srfmp->mappings_count = files[i].sf_mappings_count;
2235 		srfmp->mappings = &mappings[mappings_next];
2236 		mappings_next += srfmp->mappings_count;
2237 		if (mappings_next > mappings_count) {
2238 			error = EINVAL;
2239 			goto done;
2240 		}
2241 		srfmp->slide = files[i].sf_slide;
2242 	}
2243 
2244 	/* get the process's shared region (setup in vm_map_exec()) */
2245 	shared_region = vm_shared_region_trim_and_get(current_task());
2246 	*shared_region_ptr = shared_region;
2247 	if (shared_region == NULL) {
2248 		SHARED_REGION_TRACE_ERROR(
2249 			("shared_region: %p [%d(%s)] map(): "
2250 			"no shared region\n",
2251 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2252 			proc_getpid(p), p->p_comm));
2253 		error = EINVAL;
2254 		goto done;
2255 	}
2256 
2257 	/*
2258 	 * Check the shared region matches the current root
2259 	 * directory of this process.  Deny the mapping to
2260 	 * avoid tainting the shared region with something that
2261 	 * doesn't quite belong into it.
2262 	 */
2263 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2264 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2265 		SHARED_REGION_TRACE_ERROR(
2266 			("shared_region: map(%p) root_dir mismatch\n",
2267 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2268 		error = EPERM;
2269 		goto done;
2270 	}
2271 
2272 
2273 	for (srfmp = &(*sr_file_mappings)[0];
2274 	    srfmp < &(*sr_file_mappings)[files_count];
2275 	    srfmp++) {
2276 		if (srfmp->mappings_count == 0) {
2277 			/* no mappings here... */
2278 			continue;
2279 		}
2280 
2281 		/*
2282 		 * A file descriptor of -1 is used to indicate that the data
2283 		 * to be put in the shared region for this mapping comes directly
2284 		 * from the processes address space. Ensure we have proper alignments.
2285 		 */
2286 		if (srfmp->fd == -1) {
2287 			/* only allow one mapping per fd */
2288 			if (srfmp->mappings_count > 1) {
2289 				SHARED_REGION_TRACE_ERROR(
2290 					("shared_region: %p [%d(%s)] map data >1 mapping\n",
2291 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2292 					proc_getpid(p), p->p_comm));
2293 				error = EINVAL;
2294 				goto done;
2295 			}
2296 
2297 			/*
2298 			 * The destination address and size must be page aligned.
2299 			 */
2300 			struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2301 			mach_vm_address_t dest_addr = mapping->sms_address;
2302 			mach_vm_size_t    map_size = mapping->sms_size;
2303 			if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2304 				SHARED_REGION_TRACE_ERROR(
2305 					("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2306 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2307 					proc_getpid(p), p->p_comm, dest_addr));
2308 				error = EINVAL;
2309 				goto done;
2310 			}
2311 			if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2312 				SHARED_REGION_TRACE_ERROR(
2313 					("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2314 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2315 					proc_getpid(p), p->p_comm, map_size));
2316 				error = EINVAL;
2317 				goto done;
2318 			}
2319 			continue;
2320 		}
2321 
2322 		/* get file structure from file descriptor */
2323 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2324 		if (error) {
2325 			SHARED_REGION_TRACE_ERROR(
2326 				("shared_region: %p [%d(%s)] map: "
2327 				"fd=%d lookup failed (error=%d)\n",
2328 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2329 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2330 			goto done;
2331 		}
2332 
2333 		/* we need at least read permission on the file */
2334 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2335 			SHARED_REGION_TRACE_ERROR(
2336 				("shared_region: %p [%d(%s)] map: "
2337 				"fd=%d not readable\n",
2338 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2339 				proc_getpid(p), p->p_comm, srfmp->fd));
2340 			error = EPERM;
2341 			goto done;
2342 		}
2343 
2344 		/* get vnode from file structure */
2345 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2346 		if (error) {
2347 			SHARED_REGION_TRACE_ERROR(
2348 				("shared_region: %p [%d(%s)] map: "
2349 				"fd=%d getwithref failed (error=%d)\n",
2350 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2351 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2352 			goto done;
2353 		}
2354 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2355 
2356 		/* make sure the vnode is a regular file */
2357 		if (srfmp->vp->v_type != VREG) {
2358 			SHARED_REGION_TRACE_ERROR(
2359 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2360 				"not a file (type=%d)\n",
2361 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2362 				proc_getpid(p), p->p_comm,
2363 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2364 				srfmp->vp->v_name, srfmp->vp->v_type));
2365 			error = EINVAL;
2366 			goto done;
2367 		}
2368 
2369 #if CONFIG_MACF
2370 		/* pass in 0 for the offset argument because AMFI does not need the offset
2371 		 *       of the shared cache */
2372 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2373 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2374 		if (error) {
2375 			goto done;
2376 		}
2377 #endif /* MAC */
2378 
2379 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2380 		/*
2381 		 * Check if the shared cache is in the trust cache;
2382 		 * if so, we can skip the root ownership check.
2383 		 */
2384 #if DEVELOPMENT || DEBUG
2385 		/*
2386 		 * Skip both root ownership and trust cache check if
2387 		 * enforcement is disabled.
2388 		 */
2389 		if (!cs_system_enforcement()) {
2390 			goto after_root_check;
2391 		}
2392 #endif /* DEVELOPMENT || DEBUG */
2393 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2394 		if (blob == NULL) {
2395 			SHARED_REGION_TRACE_ERROR(
2396 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2397 				"missing CS blob\n",
2398 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2399 				proc_getpid(p), p->p_comm,
2400 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2401 				srfmp->vp->v_name));
2402 			goto root_check;
2403 		}
2404 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2405 		if (cdhash == NULL) {
2406 			SHARED_REGION_TRACE_ERROR(
2407 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2408 				"missing cdhash\n",
2409 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2410 				proc_getpid(p), p->p_comm,
2411 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2412 				srfmp->vp->v_name));
2413 			goto root_check;
2414 		}
2415 
2416 		bool in_trust_cache = false;
2417 		TrustCacheQueryToken_t qt;
2418 		if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2419 			TCType_t tc_type = kTCTypeInvalid;
2420 			TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2421 			in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2422 			    (tc_type == kTCTypeCryptex1BootOS ||
2423 			    tc_type == kTCTypeStatic ||
2424 			    tc_type == kTCTypeEngineering));
2425 		}
2426 		if (!in_trust_cache) {
2427 			SHARED_REGION_TRACE_ERROR(
2428 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2429 				"not in trust cache\n",
2430 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2431 				proc_getpid(p), p->p_comm,
2432 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2433 				srfmp->vp->v_name));
2434 			goto root_check;
2435 		}
2436 		goto after_root_check;
2437 root_check:
2438 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2439 
2440 		/* The shared cache file must be owned by root */
2441 		VATTR_INIT(&va);
2442 		VATTR_WANTED(&va, va_uid);
2443 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2444 		if (error) {
2445 			SHARED_REGION_TRACE_ERROR(
2446 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2447 				"vnode_getattr(%p) failed (error=%d)\n",
2448 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2449 				proc_getpid(p), p->p_comm,
2450 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2451 				srfmp->vp->v_name,
2452 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2453 				error));
2454 			goto done;
2455 		}
2456 		if (va.va_uid != 0) {
2457 			SHARED_REGION_TRACE_ERROR(
2458 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2459 				"owned by uid=%d instead of 0\n",
2460 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2461 				proc_getpid(p), p->p_comm,
2462 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2463 				srfmp->vp->v_name, va.va_uid));
2464 			error = EPERM;
2465 			goto done;
2466 		}
2467 
2468 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2469 after_root_check:
2470 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2471 
2472 #if CONFIG_CSR
2473 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2474 			VATTR_INIT(&va);
2475 			VATTR_WANTED(&va, va_flags);
2476 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2477 			if (error) {
2478 				SHARED_REGION_TRACE_ERROR(
2479 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2480 					"vnode_getattr(%p) failed (error=%d)\n",
2481 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2482 					proc_getpid(p), p->p_comm,
2483 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2484 					srfmp->vp->v_name,
2485 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2486 					error));
2487 				goto done;
2488 			}
2489 
2490 			if (!(va.va_flags & SF_RESTRICTED)) {
2491 				/*
2492 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2493 				 * the shared cache file is NOT SIP-protected, so reject the
2494 				 * mapping request
2495 				 */
2496 				SHARED_REGION_TRACE_ERROR(
2497 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2498 					"vnode is not SIP-protected. \n",
2499 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2500 					proc_getpid(p), p->p_comm,
2501 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2502 					srfmp->vp->v_name));
2503 				error = EPERM;
2504 				goto done;
2505 			}
2506 		}
2507 #else /* CONFIG_CSR */
2508 
2509 		/*
2510 		 * Devices without SIP/ROSP need to make sure that the shared cache
2511 		 * is either on the root volume or in the preboot cryptex volume.
2512 		 */
2513 		assert(rdir_vp != NULL);
2514 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2515 			vnode_t preboot_vp = NULL;
2516 #if XNU_TARGET_OS_OSX
2517 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2518 #else
2519 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2520 #endif
2521 			error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2522 			if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2523 				SHARED_REGION_TRACE_ERROR(
2524 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2525 					"not on process' root volume nor preboot volume\n",
2526 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2527 					proc_getpid(p), p->p_comm,
2528 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2529 					srfmp->vp->v_name));
2530 				error = EPERM;
2531 				if (preboot_vp) {
2532 					(void)vnode_put(preboot_vp);
2533 				}
2534 				goto done;
2535 			} else if (preboot_vp) {
2536 				(void)vnode_put(preboot_vp);
2537 			}
2538 		}
2539 #endif /* CONFIG_CSR */
2540 
2541 		if (scdir_enforce) {
2542 			char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2543 			struct vnode *scdir_vp = NULL;
2544 			for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2545 			    *expected_scdir_path != NULL;
2546 			    expected_scdir_path++) {
2547 				/* get vnode for expected_scdir_path */
2548 				error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2549 				if (error) {
2550 					SHARED_REGION_TRACE_ERROR(
2551 						("shared_region: %p [%d(%s)]: "
2552 						"vnode_lookup(%s) failed (error=%d)\n",
2553 						(void *)VM_KERNEL_ADDRPERM(current_thread()),
2554 						proc_getpid(p), p->p_comm,
2555 						*expected_scdir_path, error));
2556 					continue;
2557 				}
2558 
2559 				/* check if parent is scdir_vp */
2560 				assert(scdir_vp != NULL);
2561 				if (vnode_parent(srfmp->vp) == scdir_vp) {
2562 					(void)vnode_put(scdir_vp);
2563 					scdir_vp = NULL;
2564 					goto scdir_ok;
2565 				}
2566 				(void)vnode_put(scdir_vp);
2567 				scdir_vp = NULL;
2568 			}
2569 			/* nothing matches */
2570 			SHARED_REGION_TRACE_ERROR(
2571 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2572 				"shared cache file not in expected directory\n",
2573 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2574 				proc_getpid(p), p->p_comm,
2575 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2576 				srfmp->vp->v_name));
2577 			error = EPERM;
2578 			goto done;
2579 		}
2580 scdir_ok:
2581 
2582 		/* get vnode size */
2583 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2584 		if (error) {
2585 			SHARED_REGION_TRACE_ERROR(
2586 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2587 				"vnode_size(%p) failed (error=%d)\n",
2588 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2589 				proc_getpid(p), p->p_comm,
2590 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2591 				srfmp->vp->v_name,
2592 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2593 			goto done;
2594 		}
2595 		srfmp->file_size = fs;
2596 
2597 		/* get the file's memory object handle */
2598 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2599 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2600 			SHARED_REGION_TRACE_ERROR(
2601 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2602 				"no memory object\n",
2603 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2604 				proc_getpid(p), p->p_comm,
2605 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2606 				srfmp->vp->v_name));
2607 			error = EINVAL;
2608 			goto done;
2609 		}
2610 
2611 		/* check that the mappings are properly covered by code signatures */
2612 		if (!cs_system_enforcement()) {
2613 			/* code signing is not enforced: no need to check */
2614 		} else {
2615 			for (i = 0; i < srfmp->mappings_count; i++) {
2616 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2617 					/* zero-filled mapping: not backed by the file */
2618 					continue;
2619 				}
2620 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2621 				    srfmp->mappings[i].sms_file_offset,
2622 				    srfmp->mappings[i].sms_size)) {
2623 					/* this mapping is fully covered by code signatures */
2624 					continue;
2625 				}
2626 				SHARED_REGION_TRACE_ERROR(
2627 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2628 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2629 					"is not code-signed\n",
2630 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2631 					proc_getpid(p), p->p_comm,
2632 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2633 					srfmp->vp->v_name,
2634 					i, srfmp->mappings_count,
2635 					srfmp->mappings[i].sms_address,
2636 					srfmp->mappings[i].sms_size,
2637 					srfmp->mappings[i].sms_file_offset,
2638 					srfmp->mappings[i].sms_max_prot,
2639 					srfmp->mappings[i].sms_init_prot));
2640 				error = EINVAL;
2641 				goto done;
2642 			}
2643 		}
2644 	}
2645 done:
2646 	if (error != 0) {
2647 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2648 		*sr_file_mappings = NULL;
2649 		*shared_region_ptr = NULL;
2650 	}
2651 	return error;
2652 }
2653 
2654 /*
2655  * shared_region_map_np()
2656  *
2657  * This system call is intended for dyld.
2658  *
2659  * dyld uses this to map a shared cache file into a shared region.
2660  * This is usually done only the first time a shared cache is needed.
2661  * Subsequent processes will just use the populated shared region without
2662  * requiring any further setup.
2663  */
2664 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2665 _shared_region_map_and_slide(
2666 	struct proc                         *p,
2667 	uint32_t                            files_count,
2668 	struct shared_file_np               *files,
2669 	uint32_t                            mappings_count,
2670 	struct shared_file_mapping_slide_np *mappings)
2671 {
2672 	int                             error = 0;
2673 	kern_return_t                   kr = KERN_SUCCESS;
2674 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2675 	struct vnode                    *rdir_vp = NULL;
2676 	struct vm_shared_region         *shared_region = NULL;
2677 
2678 	/*
2679 	 * Get a reference to the current proc's root dir.
2680 	 * Need this to prevent racing with chroot.
2681 	 */
2682 	proc_fdlock(p);
2683 	rdir_vp = p->p_fd.fd_rdir;
2684 	if (rdir_vp == NULL) {
2685 		rdir_vp = rootvnode;
2686 	}
2687 	assert(rdir_vp != NULL);
2688 	vnode_get(rdir_vp);
2689 	proc_fdunlock(p);
2690 
2691 	/*
2692 	 * Turn files, mappings into sr_file_mappings and other setup.
2693 	 */
2694 	error = shared_region_map_and_slide_setup(p, files_count,
2695 	    files, mappings_count, mappings,
2696 	    &sr_file_mappings, &shared_region, rdir_vp);
2697 	if (error != 0) {
2698 		vnode_put(rdir_vp);
2699 		return error;
2700 	}
2701 
2702 	/* map the file(s) into that shared region's submap */
2703 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2704 	if (kr != KERN_SUCCESS) {
2705 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2706 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2707 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2708 		    proc_getpid(p), p->p_comm, kr));
2709 	}
2710 
2711 	/* convert kern_return_t to errno */
2712 	switch (kr) {
2713 	case KERN_SUCCESS:
2714 		error = 0;
2715 		break;
2716 	case KERN_INVALID_ADDRESS:
2717 		error = EFAULT;
2718 		break;
2719 	case KERN_PROTECTION_FAILURE:
2720 		error = EPERM;
2721 		break;
2722 	case KERN_NO_SPACE:
2723 		error = ENOMEM;
2724 		break;
2725 	case KERN_FAILURE:
2726 	case KERN_INVALID_ARGUMENT:
2727 	default:
2728 		error = EINVAL;
2729 		break;
2730 	}
2731 
2732 	/*
2733 	 * Mark that this process is now using split libraries.
2734 	 */
2735 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2736 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2737 	}
2738 
2739 	vnode_put(rdir_vp);
2740 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2741 
2742 	SHARED_REGION_TRACE_DEBUG(
2743 		("shared_region: %p [%d(%s)] <- map\n",
2744 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2745 		proc_getpid(p), p->p_comm));
2746 
2747 	return error;
2748 }
2749 
2750 /*
2751  * Clean up part of _shared_region_map_and_slide()
2752  * It had to be broken out of _shared_region_map_and_slide() to
2753  * prevent compiler inlining from blowing out the stack.
2754  */
2755 __attribute__((noinline))
2756 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2757 shared_region_map_and_slide_cleanup(
2758 	struct proc              *p,
2759 	uint32_t                 files_count,
2760 	struct _sr_file_mappings *sr_file_mappings,
2761 	struct vm_shared_region  *shared_region)
2762 {
2763 	struct _sr_file_mappings *srfmp;
2764 	struct vnode_attr        va;
2765 
2766 	if (sr_file_mappings != NULL) {
2767 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2768 			if (srfmp->vp != NULL) {
2769 				vnode_lock_spin(srfmp->vp);
2770 				srfmp->vp->v_flag |= VSHARED_DYLD;
2771 				vnode_unlock(srfmp->vp);
2772 
2773 				/* update the vnode's access time */
2774 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2775 					VATTR_INIT(&va);
2776 					nanotime(&va.va_access_time);
2777 					VATTR_SET_ACTIVE(&va, va_access_time);
2778 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2779 				}
2780 
2781 #if NAMEDSTREAMS
2782 				/*
2783 				 * If the shared cache is compressed, it may
2784 				 * have a namedstream vnode instantiated for
2785 				 * for it. That namedstream vnode will also
2786 				 * have to be marked with VSHARED_DYLD.
2787 				 */
2788 				if (vnode_hasnamedstreams(srfmp->vp)) {
2789 					vnode_t svp;
2790 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2791 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2792 						vnode_lock_spin(svp);
2793 						svp->v_flag |= VSHARED_DYLD;
2794 						vnode_unlock(svp);
2795 						vnode_put(svp);
2796 					}
2797 				}
2798 #endif /* NAMEDSTREAMS */
2799 				/*
2800 				 * release the vnode...
2801 				 * ubc_map() still holds it for us in the non-error case
2802 				 */
2803 				(void) vnode_put(srfmp->vp);
2804 				srfmp->vp = NULL;
2805 			}
2806 			if (srfmp->fp != NULL) {
2807 				/* release the file descriptor */
2808 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2809 				srfmp->fp = NULL;
2810 			}
2811 		}
2812 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2813 	}
2814 
2815 	if (shared_region != NULL) {
2816 		vm_shared_region_deallocate(shared_region);
2817 	}
2818 }
2819 
2820 
2821 /*
2822  * For each file mapped, we may have mappings for:
2823  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2824  * so let's round up to 8 mappings per file.
2825  */
2826 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2827 
2828 /*
2829  * This is the new interface for setting up shared region mappings.
2830  *
2831  * The slide used for shared regions setup using this interface is done differently
2832  * from the old interface. The slide value passed in the shared_files_np represents
2833  * a max value. The kernel will choose a random value based on that, then use it
2834  * for all shared regions.
2835  */
2836 #if defined (__x86_64__)
2837 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2838 #else
2839 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2840 #endif
2841 
2842 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2843 shared_region_map_and_slide_2_np(
2844 	struct proc                                  *p,
2845 	struct shared_region_map_and_slide_2_np_args *uap,
2846 	__unused int                                 *retvalp)
2847 {
2848 	unsigned int                  files_count;
2849 	struct shared_file_np         *shared_files = NULL;
2850 	unsigned int                  mappings_count;
2851 	struct shared_file_mapping_slide_np *mappings = NULL;
2852 	kern_return_t                 kr = KERN_SUCCESS;
2853 
2854 	files_count = uap->files_count;
2855 	mappings_count = uap->mappings_count;
2856 
2857 	if (files_count == 0) {
2858 		SHARED_REGION_TRACE_INFO(
2859 			("shared_region: %p [%d(%s)] map(): "
2860 			"no files\n",
2861 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2862 			proc_getpid(p), p->p_comm));
2863 		kr = 0; /* no files to map: we're done ! */
2864 		goto done;
2865 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2866 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2867 		if (shared_files == NULL) {
2868 			kr = KERN_RESOURCE_SHORTAGE;
2869 			goto done;
2870 		}
2871 	} else {
2872 		SHARED_REGION_TRACE_ERROR(
2873 			("shared_region: %p [%d(%s)] map(): "
2874 			"too many files (%d) max %d\n",
2875 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2876 			proc_getpid(p), p->p_comm,
2877 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2878 		kr = KERN_FAILURE;
2879 		goto done;
2880 	}
2881 
2882 	if (mappings_count == 0) {
2883 		SHARED_REGION_TRACE_INFO(
2884 			("shared_region: %p [%d(%s)] map(): "
2885 			"no mappings\n",
2886 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2887 			proc_getpid(p), p->p_comm));
2888 		kr = 0; /* no mappings: we're done ! */
2889 		goto done;
2890 	} else if (mappings_count <= SFM_MAX) {
2891 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2892 		if (mappings == NULL) {
2893 			kr = KERN_RESOURCE_SHORTAGE;
2894 			goto done;
2895 		}
2896 	} else {
2897 		SHARED_REGION_TRACE_ERROR(
2898 			("shared_region: %p [%d(%s)] map(): "
2899 			"too many mappings (%d) max %d\n",
2900 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2901 			proc_getpid(p), p->p_comm,
2902 			mappings_count, SFM_MAX));
2903 		kr = KERN_FAILURE;
2904 		goto done;
2905 	}
2906 
2907 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2908 	if (kr != KERN_SUCCESS) {
2909 		goto done;
2910 	}
2911 
2912 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2913 	if (kr != KERN_SUCCESS) {
2914 		goto done;
2915 	}
2916 
2917 	uint32_t max_slide = shared_files[0].sf_slide;
2918 	uint32_t random_val;
2919 	uint32_t slide_amount;
2920 
2921 	if (max_slide != 0) {
2922 		read_random(&random_val, sizeof random_val);
2923 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2924 	} else {
2925 		slide_amount = 0;
2926 	}
2927 #if DEVELOPMENT || DEBUG
2928 	extern bool bootarg_disable_aslr;
2929 	if (bootarg_disable_aslr) {
2930 		slide_amount = 0;
2931 	}
2932 #endif /* DEVELOPMENT || DEBUG */
2933 
2934 	/*
2935 	 * Fix up the mappings to reflect the desired slide.
2936 	 */
2937 	unsigned int f;
2938 	unsigned int m = 0;
2939 	unsigned int i;
2940 	for (f = 0; f < files_count; ++f) {
2941 		shared_files[f].sf_slide = slide_amount;
2942 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2943 			if (m >= mappings_count) {
2944 				SHARED_REGION_TRACE_ERROR(
2945 					("shared_region: %p [%d(%s)] map(): "
2946 					"mapping count argument was too small\n",
2947 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2948 					proc_getpid(p), p->p_comm));
2949 				kr = KERN_FAILURE;
2950 				goto done;
2951 			}
2952 			mappings[m].sms_address += slide_amount;
2953 			if (mappings[m].sms_slide_size != 0) {
2954 				mappings[m].sms_slide_start += slide_amount;
2955 			}
2956 		}
2957 	}
2958 
2959 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2960 done:
2961 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2962 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2963 	return kr;
2964 }
2965 
2966 /*
2967  * A syscall for dyld to use to map data pages that need load time relocation fixups.
2968  * The fixups are performed by a custom pager during page-in, so the pages still appear
2969  * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2970  * on demand later, all w/o using the compressor.
2971  *
2972  * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2973  * running, they are COW'd as normal.
2974  */
2975 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2976 map_with_linking_np(
2977 	struct proc                     *p,
2978 	struct map_with_linking_np_args *uap,
2979 	__unused int                    *retvalp)
2980 {
2981 	uint32_t                        region_count;
2982 	uint32_t                        r;
2983 	struct mwl_region               *regions = NULL;
2984 	struct mwl_region               *rp;
2985 	uint32_t                        link_info_size;
2986 	void                            *link_info = NULL;      /* starts with a struct mwl_info_hdr */
2987 	struct mwl_info_hdr             *info_hdr = NULL;
2988 	uint64_t                        binds_size;
2989 	int                             fd;
2990 	struct fileproc                 *fp = NULL;
2991 	struct vnode                    *vp = NULL;
2992 	size_t                          file_size;
2993 	off_t                           fs;
2994 	struct vnode_attr               va;
2995 	memory_object_control_t         file_control = NULL;
2996 	int                             error;
2997 	kern_return_t                   kr = KERN_SUCCESS;
2998 
2999 	/*
3000 	 * Check if dyld has told us it finished with this call.
3001 	 */
3002 	if (p->p_disallow_map_with_linking) {
3003 		printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3004 		    __func__, proc_getpid(p), p->p_comm);
3005 		kr = KERN_FAILURE;
3006 		goto done;
3007 	}
3008 
3009 	/*
3010 	 * First we do some sanity checking on what dyld has passed us.
3011 	 */
3012 	region_count = uap->region_count;
3013 	link_info_size = uap->link_info_size;
3014 	if (region_count == 0) {
3015 		printf("%s: [%d(%s)]: region_count == 0\n",
3016 		    __func__, proc_getpid(p), p->p_comm);
3017 		kr = KERN_FAILURE;
3018 		goto done;
3019 	}
3020 	if (region_count > MWL_MAX_REGION_COUNT) {
3021 		printf("%s: [%d(%s)]: region_count too big %d\n",
3022 		    __func__, proc_getpid(p), p->p_comm, region_count);
3023 		kr = KERN_FAILURE;
3024 		goto done;
3025 	}
3026 
3027 	if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3028 		printf("%s: [%d(%s)]: link_info_size too small\n",
3029 		    __func__, proc_getpid(p), p->p_comm);
3030 		kr = KERN_FAILURE;
3031 		goto done;
3032 	}
3033 	if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3034 		printf("%s: [%d(%s)]: link_info_size too big %d\n",
3035 		    __func__, proc_getpid(p), p->p_comm, link_info_size);
3036 		kr = KERN_FAILURE;
3037 		goto done;
3038 	}
3039 
3040 	/*
3041 	 * Allocate and copyin the regions and link info
3042 	 */
3043 	regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3044 	if (regions == NULL) {
3045 		printf("%s: [%d(%s)]: failed to allocate regions\n",
3046 		    __func__, proc_getpid(p), p->p_comm);
3047 		kr = KERN_RESOURCE_SHORTAGE;
3048 		goto done;
3049 	}
3050 	kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3051 	if (kr != KERN_SUCCESS) {
3052 		printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3053 		    __func__, proc_getpid(p), p->p_comm, kr);
3054 		goto done;
3055 	}
3056 
3057 	link_info = kalloc_data(link_info_size, Z_WAITOK);
3058 	if (link_info == NULL) {
3059 		printf("%s: [%d(%s)]: failed to allocate link_info\n",
3060 		    __func__, proc_getpid(p), p->p_comm);
3061 		kr = KERN_RESOURCE_SHORTAGE;
3062 		goto done;
3063 	}
3064 	kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3065 	if (kr != KERN_SUCCESS) {
3066 		printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3067 		    __func__, proc_getpid(p), p->p_comm, kr);
3068 		goto done;
3069 	}
3070 
3071 	/*
3072 	 * Do some verification the data structures.
3073 	 */
3074 	info_hdr = (struct mwl_info_hdr *)link_info;
3075 	if (info_hdr->mwli_version != MWL_INFO_VERS) {
3076 		printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3077 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3078 		kr = KERN_FAILURE;
3079 		goto done;
3080 	}
3081 
3082 	if (info_hdr->mwli_binds_offset > link_info_size) {
3083 		printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3084 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3085 		kr = KERN_FAILURE;
3086 		goto done;
3087 	}
3088 
3089 	/* some older devs have s/w page size > h/w page size, no need to support them */
3090 	if (info_hdr->mwli_page_size != PAGE_SIZE) {
3091 		/* no printf, since this is expected on some devices */
3092 		kr = KERN_INVALID_ARGUMENT;
3093 		goto done;
3094 	}
3095 
3096 	binds_size = (uint64_t)info_hdr->mwli_binds_count *
3097 	    ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3098 	if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3099 		printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3100 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3101 		kr = KERN_FAILURE;
3102 		goto done;
3103 	}
3104 
3105 	if (info_hdr->mwli_chains_offset > link_info_size) {
3106 		printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3107 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3108 		kr = KERN_FAILURE;
3109 		goto done;
3110 	}
3111 
3112 
3113 	/*
3114 	 * Ensure the chained starts in the link info and make sure the
3115 	 * segment info offsets are within bounds.
3116 	 */
3117 	if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3118 		printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3119 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3120 		kr = KERN_FAILURE;
3121 		goto done;
3122 	}
3123 	if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3124 		printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3125 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3126 		kr = KERN_FAILURE;
3127 		goto done;
3128 	}
3129 
3130 	/* Note that more verification of offsets is done in the pager itself */
3131 
3132 	/*
3133 	 * Ensure we've only been given one FD and verify valid protections.
3134 	 */
3135 	fd = regions[0].mwlr_fd;
3136 	for (r = 0; r < region_count; ++r) {
3137 		if (regions[r].mwlr_fd != fd) {
3138 			printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3139 			    __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3140 			kr = KERN_FAILURE;
3141 			goto done;
3142 		}
3143 		regions[r].mwlr_protections &= VM_PROT_ALL;
3144 		if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3145 			printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3146 			    __func__, proc_getpid(p), p->p_comm);
3147 			kr = KERN_FAILURE;
3148 			goto done;
3149 		}
3150 	}
3151 
3152 
3153 	/* get file structure from file descriptor */
3154 	error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3155 	if (error) {
3156 		printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3157 		    __func__, proc_getpid(p), p->p_comm, error);
3158 		kr = KERN_FAILURE;
3159 		goto done;
3160 	}
3161 
3162 	/* We need at least read permission on the file */
3163 	if (!(fp->fp_glob->fg_flag & FREAD)) {
3164 		printf("%s: [%d(%s)]: not readable\n",
3165 		    __func__, proc_getpid(p), p->p_comm);
3166 		kr = KERN_FAILURE;
3167 		goto done;
3168 	}
3169 
3170 	/* Get the vnode from file structure */
3171 	vp = (struct vnode *)fp_get_data(fp);
3172 	error = vnode_getwithref(vp);
3173 	if (error) {
3174 		printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3175 		    __func__, proc_getpid(p), p->p_comm, error);
3176 		kr = KERN_FAILURE;
3177 		vp = NULL; /* just to be sure */
3178 		goto done;
3179 	}
3180 
3181 	/* Make sure the vnode is a regular file */
3182 	if (vp->v_type != VREG) {
3183 		printf("%s: [%d(%s)]: vnode not VREG\n",
3184 		    __func__, proc_getpid(p), p->p_comm);
3185 		kr = KERN_FAILURE;
3186 		goto done;
3187 	}
3188 
3189 	/* get vnode size */
3190 	error = vnode_size(vp, &fs, vfs_context_current());
3191 	if (error) {
3192 		goto done;
3193 	}
3194 	file_size = fs;
3195 
3196 	/* get the file's memory object handle */
3197 	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3198 	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3199 		printf("%s: [%d(%s)]: no memory object\n",
3200 		    __func__, proc_getpid(p), p->p_comm);
3201 		kr = KERN_FAILURE;
3202 		goto done;
3203 	}
3204 
3205 	for (r = 0; r < region_count; ++r) {
3206 		rp = &regions[r];
3207 
3208 		/*
3209 		 * Only allow data mappings and not zero fill.
3210 		 */
3211 		if (rp->mwlr_protections & VM_PROT_ZF) {
3212 			printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF\n",
3213 			    __func__, proc_getpid(p), p->p_comm, r);
3214 			kr = KERN_FAILURE;
3215 			goto done;
3216 		}
3217 		if (rp->mwlr_protections & VM_PROT_EXECUTE) {
3218 			printf("%s: [%d(%s)]: region %d, found VM_PROT_EXECUTE\n",
3219 			    __func__, proc_getpid(p), p->p_comm, r);
3220 			kr = KERN_FAILURE;
3221 			goto done;
3222 		}
3223 
3224 #if CONFIG_MACF
3225 		vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3226 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3227 		    fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3228 		if (error) {
3229 			printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3230 			    __func__, proc_getpid(p), p->p_comm, r, error);
3231 			kr = KERN_FAILURE;
3232 			goto done;
3233 		}
3234 #endif /* MAC */
3235 
3236 		/* check that the mappings are properly covered by code signatures */
3237 		if (cs_system_enforcement()) {
3238 			if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3239 				printf("%s: [%d(%s)]: region %d, not code signed\n",
3240 				    __func__, proc_getpid(p), p->p_comm, r);
3241 				kr = KERN_FAILURE;
3242 				goto done;
3243 			}
3244 		}
3245 	}
3246 
3247 	/* update the vnode's access time */
3248 	if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3249 		VATTR_INIT(&va);
3250 		nanotime(&va.va_access_time);
3251 		VATTR_SET_ACTIVE(&va, va_access_time);
3252 		vnode_setattr(vp, &va, vfs_context_current());
3253 	}
3254 
3255 	/* get the VM to do the work */
3256 	kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3257 
3258 done:
3259 	if (fp != NULL) {
3260 		/* release the file descriptor */
3261 		fp_drop(p, fd, fp, 0);
3262 	}
3263 	if (vp != NULL) {
3264 		(void)vnode_put(vp);
3265 	}
3266 	if (regions != NULL) {
3267 		kfree_data(regions, region_count * sizeof(regions[0]));
3268 	}
3269 	/* link info is used in the pager if things worked */
3270 	if (link_info != NULL && kr != KERN_SUCCESS) {
3271 		kfree_data(link_info, link_info_size);
3272 	}
3273 
3274 	switch (kr) {
3275 	case KERN_SUCCESS:
3276 		return 0;
3277 	case KERN_RESOURCE_SHORTAGE:
3278 		return ENOMEM;
3279 	default:
3280 		return EINVAL;
3281 	}
3282 }
3283 
3284 #if DEBUG || DEVELOPMENT
3285 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3286     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3287 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3288     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3289 #endif /* DEBUG || DEVELOPMENT */
3290 
3291 /* sysctl overflow room */
3292 
3293 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3294     (int *) &page_size, 0, "vm page size");
3295 
3296 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3297  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3298  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3299 extern unsigned int     vm_page_free_target;
3300 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3301     &vm_page_free_target, 0, "Pageout daemon free target");
3302 
3303 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3304     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3305 
3306 static int
3307 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3308 {
3309 #pragma unused(oidp, arg1, arg2)
3310 	unsigned int page_free_wanted;
3311 
3312 	page_free_wanted = mach_vm_ctl_page_free_wanted();
3313 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3314 }
3315 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3316     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3317     0, 0, vm_ctl_page_free_wanted, "I", "");
3318 
3319 extern unsigned int     vm_page_purgeable_count;
3320 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3321     &vm_page_purgeable_count, 0, "Purgeable page count");
3322 
3323 extern unsigned int     vm_page_purgeable_wired_count;
3324 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3325     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3326 
3327 extern unsigned int vm_page_kern_lpage_count;
3328 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3329     &vm_page_kern_lpage_count, 0, "kernel used large pages");
3330 
3331 #if DEVELOPMENT || DEBUG
3332 #if __ARM_MIXED_PAGE_SIZE__
3333 static int vm_mixed_pagesize_supported = 1;
3334 #else
3335 static int vm_mixed_pagesize_supported = 0;
3336 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3337 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3338     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3339 
3340 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3341 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3342 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3343     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3344 
3345 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3346     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3347 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3348     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3349 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3350     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3351 
3352 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3353     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3354 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3355     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3356 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3357     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3358 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3359     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3360 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3361     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3362 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3363     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3364 #endif /* DEVELOPMENT || DEBUG */
3365 
3366 extern int madvise_free_debug;
3367 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3368     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3369 
3370 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3371     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3372 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3373     &vm_page_stats_reusable.reusable_pages_success, "");
3374 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3375     &vm_page_stats_reusable.reusable_pages_failure, "");
3376 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3377     &vm_page_stats_reusable.reusable_pages_shared, "");
3378 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3379     &vm_page_stats_reusable.all_reusable_calls, "");
3380 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3381     &vm_page_stats_reusable.partial_reusable_calls, "");
3382 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3383     &vm_page_stats_reusable.reuse_pages_success, "");
3384 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3385     &vm_page_stats_reusable.reuse_pages_failure, "");
3386 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3387     &vm_page_stats_reusable.all_reuse_calls, "");
3388 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3389     &vm_page_stats_reusable.partial_reuse_calls, "");
3390 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3391     &vm_page_stats_reusable.can_reuse_success, "");
3392 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3393     &vm_page_stats_reusable.can_reuse_failure, "");
3394 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3395     &vm_page_stats_reusable.reusable_reclaimed, "");
3396 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3397     &vm_page_stats_reusable.reusable_nonwritable, "");
3398 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3399     &vm_page_stats_reusable.reusable_shared, "");
3400 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3401     &vm_page_stats_reusable.free_shared, "");
3402 
3403 
3404 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3405 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3406 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3407 
3408 extern unsigned int vm_page_cleaned_count;
3409 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3410 
3411 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3412 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3413 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3414 
3415 /* pageout counts */
3416 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3417 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3418 
3419 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3420 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3421 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3422 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3423 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3424 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3425 
3426 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3427 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3428 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3429 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3430 extern unsigned int vm_page_realtime_count;
3431 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3432 extern int vm_pageout_protect_realtime;
3433 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3434 
3435 /* counts of pages prefaulted when entering a memory object */
3436 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3437 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3438 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3439 
3440 #if defined (__x86_64__)
3441 extern unsigned int vm_clump_promote_threshold;
3442 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3443 #if DEVELOPMENT || DEBUG
3444 extern unsigned long vm_clump_stats[];
3445 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3446 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3447 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3448 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3449 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3450 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3451 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3452 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3453 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3454 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3455 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3456 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3457 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3458 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3459 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3460 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3461 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3462 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3463 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3464 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3465 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3466 #endif  /* if DEVELOPMENT || DEBUG */
3467 #endif  /* #if defined (__x86_64__) */
3468 
3469 #if CONFIG_SECLUDED_MEMORY
3470 
3471 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3472 extern unsigned int vm_page_secluded_target;
3473 extern unsigned int vm_page_secluded_count;
3474 extern unsigned int vm_page_secluded_count_free;
3475 extern unsigned int vm_page_secluded_count_inuse;
3476 extern unsigned int vm_page_secluded_count_over_target;
3477 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3478 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3479 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3480 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3481 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3482 
3483 extern struct vm_page_secluded_data vm_page_secluded;
3484 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3485 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3486 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3487 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3488 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3489 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3490 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3491 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3492 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3493 
3494 #endif /* CONFIG_SECLUDED_MEMORY */
3495 
3496 #include <kern/thread.h>
3497 #include <sys/user.h>
3498 
3499 void vm_pageout_io_throttle(void);
3500 
3501 void
vm_pageout_io_throttle(void)3502 vm_pageout_io_throttle(void)
3503 {
3504 	struct uthread *uthread = current_uthread();
3505 
3506 	/*
3507 	 * thread is marked as a low priority I/O type
3508 	 * and the I/O we issued while in this cleaning operation
3509 	 * collided with normal I/O operations... we'll
3510 	 * delay in order to mitigate the impact of this
3511 	 * task on the normal operation of the system
3512 	 */
3513 
3514 	if (uthread->uu_lowpri_window) {
3515 		throttle_lowpri_io(1);
3516 	}
3517 }
3518 
3519 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3520 vm_pressure_monitor(
3521 	__unused struct proc *p,
3522 	struct vm_pressure_monitor_args *uap,
3523 	int *retval)
3524 {
3525 	kern_return_t   kr;
3526 	uint32_t        pages_reclaimed;
3527 	uint32_t        pages_wanted;
3528 
3529 	kr = mach_vm_pressure_monitor(
3530 		(boolean_t) uap->wait_for_pressure,
3531 		uap->nsecs_monitored,
3532 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3533 		&pages_wanted);
3534 
3535 	switch (kr) {
3536 	case KERN_SUCCESS:
3537 		break;
3538 	case KERN_ABORTED:
3539 		return EINTR;
3540 	default:
3541 		return EINVAL;
3542 	}
3543 
3544 	if (uap->pages_reclaimed) {
3545 		if (copyout((void *)&pages_reclaimed,
3546 		    uap->pages_reclaimed,
3547 		    sizeof(pages_reclaimed)) != 0) {
3548 			return EFAULT;
3549 		}
3550 	}
3551 
3552 	*retval = (int) pages_wanted;
3553 	return 0;
3554 }
3555 
3556 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3557 kas_info(struct proc *p,
3558     struct kas_info_args *uap,
3559     int *retval __unused)
3560 {
3561 #ifndef CONFIG_KAS_INFO
3562 	(void)p;
3563 	(void)uap;
3564 	return ENOTSUP;
3565 #else /* CONFIG_KAS_INFO */
3566 	int                     selector = uap->selector;
3567 	user_addr_t     valuep = uap->value;
3568 	user_addr_t     sizep = uap->size;
3569 	user_size_t size, rsize;
3570 	int                     error;
3571 
3572 	if (!kauth_cred_issuser(kauth_cred_get())) {
3573 		return EPERM;
3574 	}
3575 
3576 #if CONFIG_MACF
3577 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3578 	if (error) {
3579 		return error;
3580 	}
3581 #endif
3582 
3583 	if (IS_64BIT_PROCESS(p)) {
3584 		user64_size_t size64;
3585 		error = copyin(sizep, &size64, sizeof(size64));
3586 		size = (user_size_t)size64;
3587 	} else {
3588 		user32_size_t size32;
3589 		error = copyin(sizep, &size32, sizeof(size32));
3590 		size = (user_size_t)size32;
3591 	}
3592 	if (error) {
3593 		return error;
3594 	}
3595 
3596 	switch (selector) {
3597 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3598 	{
3599 		uint64_t slide = vm_kernel_slide;
3600 
3601 		if (sizeof(slide) != size) {
3602 			return EINVAL;
3603 		}
3604 
3605 		error = copyout(&slide, valuep, sizeof(slide));
3606 		if (error) {
3607 			return error;
3608 		}
3609 		rsize = size;
3610 	}
3611 	break;
3612 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3613 	{
3614 		uint32_t i;
3615 		kernel_mach_header_t *mh = &_mh_execute_header;
3616 		struct load_command *cmd;
3617 		cmd = (struct load_command*) &mh[1];
3618 		uint64_t *bases;
3619 		rsize = mh->ncmds * sizeof(uint64_t);
3620 
3621 		/*
3622 		 * Return the size if no data was passed
3623 		 */
3624 		if (valuep == 0) {
3625 			break;
3626 		}
3627 
3628 		if (rsize > size) {
3629 			return EINVAL;
3630 		}
3631 
3632 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3633 
3634 		for (i = 0; i < mh->ncmds; i++) {
3635 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3636 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3637 				bases[i] = (uint64_t)sg->vmaddr;
3638 			}
3639 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3640 		}
3641 
3642 		error = copyout(bases, valuep, rsize);
3643 
3644 		kfree_data(bases, rsize);
3645 
3646 		if (error) {
3647 			return error;
3648 		}
3649 	}
3650 	break;
3651 	default:
3652 		return EINVAL;
3653 	}
3654 
3655 	if (IS_64BIT_PROCESS(p)) {
3656 		user64_size_t size64 = (user64_size_t)rsize;
3657 		error = copyout(&size64, sizep, sizeof(size64));
3658 	} else {
3659 		user32_size_t size32 = (user32_size_t)rsize;
3660 		error = copyout(&size32, sizep, sizeof(size32));
3661 	}
3662 
3663 	return error;
3664 #endif /* CONFIG_KAS_INFO */
3665 }
3666 
3667 #if __has_feature(ptrauth_calls)
3668 /*
3669  * Generate a random pointer signing key that isn't 0.
3670  */
3671 uint64_t
generate_jop_key(void)3672 generate_jop_key(void)
3673 {
3674 	uint64_t key;
3675 
3676 	do {
3677 		read_random(&key, sizeof key);
3678 	} while (key == 0);
3679 	return key;
3680 }
3681 #endif /* __has_feature(ptrauth_calls) */
3682 
3683 
3684 #pragma clang diagnostic push
3685 #pragma clang diagnostic ignored "-Wcast-qual"
3686 #pragma clang diagnostic ignored "-Wunused-function"
3687 
3688 static void
asserts()3689 asserts()
3690 {
3691 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3692 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3693 }
3694 
3695 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3696 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3697 #pragma clang diagnostic pop
3698 
3699 extern uint32_t vm_page_pages;
3700 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3701 
3702 extern uint32_t vm_page_busy_absent_skipped;
3703 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3704 
3705 extern uint32_t vm_page_upl_tainted;
3706 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3707 
3708 extern uint32_t vm_page_iopl_tainted;
3709 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3710 
3711 #if __arm64__ && (DEVELOPMENT || DEBUG)
3712 extern int vm_footprint_suspend_allowed;
3713 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3714 
3715 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3716 static int
3717 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3718 {
3719 #pragma unused(oidp, arg1, arg2)
3720 	int error = 0;
3721 	int new_value;
3722 
3723 	if (req->newptr == USER_ADDR_NULL) {
3724 		return 0;
3725 	}
3726 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3727 	if (error) {
3728 		return error;
3729 	}
3730 	if (!vm_footprint_suspend_allowed) {
3731 		if (new_value != 0) {
3732 			/* suspends are not allowed... */
3733 			return 0;
3734 		}
3735 		/* ... but let resumes proceed */
3736 	}
3737 	DTRACE_VM2(footprint_suspend,
3738 	    vm_map_t, current_map(),
3739 	    int, new_value);
3740 
3741 	pmap_footprint_suspend(current_map(), new_value);
3742 
3743 	return 0;
3744 }
3745 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3746     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3747     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3748 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3749 
3750 extern uint64_t vm_map_corpse_footprint_count;
3751 extern uint64_t vm_map_corpse_footprint_size_avg;
3752 extern uint64_t vm_map_corpse_footprint_size_max;
3753 extern uint64_t vm_map_corpse_footprint_full;
3754 extern uint64_t vm_map_corpse_footprint_no_buf;
3755 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3756     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3757 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3758     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3759 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3760     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3761 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3762     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3763 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3764     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3765 
3766 
3767 extern uint64_t shared_region_pager_copied;
3768 extern uint64_t shared_region_pager_slid;
3769 extern uint64_t shared_region_pager_slid_error;
3770 extern uint64_t shared_region_pager_reclaimed;
3771 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3772     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3773 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3774     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3775 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3776     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3777 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3778     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3779 extern int shared_region_destroy_delay;
3780 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3781     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3782 
3783 #if MACH_ASSERT
3784 extern int pmap_ledgers_panic_leeway;
3785 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3786 #endif /* MACH_ASSERT */
3787 
3788 
3789 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3790 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3791 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3792 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3793 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3794 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3795 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3796 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3797 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3798 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3799 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3800 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3801 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3802 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3803     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3804 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3805     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3806 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3807     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3808 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3809     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3810 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3811     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3812 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3813     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3814 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3815     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3816 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3817     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3818 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3819     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3820 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3821     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3822 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3823     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3824 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3825     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3826 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3827     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3828 
3829 extern int vm_protect_privileged_from_untrusted;
3830 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3831     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3832 extern uint64_t vm_copied_on_read;
3833 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3834     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3835 
3836 extern int vm_shared_region_count;
3837 extern int vm_shared_region_peak;
3838 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3839     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3840 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3841     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3842 #if DEVELOPMENT || DEBUG
3843 extern unsigned int shared_region_pagers_resident_count;
3844 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3845     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3846 extern unsigned int shared_region_pagers_resident_peak;
3847 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3848     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3849 extern int shared_region_pager_count;
3850 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3851     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3852 #if __has_feature(ptrauth_calls)
3853 extern int shared_region_key_count;
3854 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3855     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3856 extern int vm_shared_region_reslide_count;
3857 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3858     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3859 #endif /* __has_feature(ptrauth_calls) */
3860 #endif /* DEVELOPMENT || DEBUG */
3861 
3862 #if MACH_ASSERT
3863 extern int debug4k_filter;
3864 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3865 extern int debug4k_panic_on_terminate;
3866 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3867 extern int debug4k_panic_on_exception;
3868 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3869 extern int debug4k_panic_on_misaligned_sharing;
3870 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3871 #endif /* MACH_ASSERT */
3872 
3873 extern uint64_t vm_map_set_size_limit_count;
3874 extern uint64_t vm_map_set_data_limit_count;
3875 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3876 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3877 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3878 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3879 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3880 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3881 
3882 extern uint64_t vm_fault_resilient_media_initiate;
3883 extern uint64_t vm_fault_resilient_media_retry;
3884 extern uint64_t vm_fault_resilient_media_proceed;
3885 extern uint64_t vm_fault_resilient_media_release;
3886 extern uint64_t vm_fault_resilient_media_abort1;
3887 extern uint64_t vm_fault_resilient_media_abort2;
3888 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3889 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3890 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3891 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3892 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3893 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3894 #if MACH_ASSERT
3895 extern int vm_fault_resilient_media_inject_error1_rate;
3896 extern int vm_fault_resilient_media_inject_error1;
3897 extern int vm_fault_resilient_media_inject_error2_rate;
3898 extern int vm_fault_resilient_media_inject_error2;
3899 extern int vm_fault_resilient_media_inject_error3_rate;
3900 extern int vm_fault_resilient_media_inject_error3;
3901 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3902 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3903 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3904 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3905 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3906 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3907 #endif /* MACH_ASSERT */
3908 
3909 extern uint64_t pmap_query_page_info_retries;
3910 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3911 
3912 /*
3913  * A sysctl which causes all existing shared regions to become stale. They
3914  * will no longer be used by anything new and will be torn down as soon as
3915  * the last existing user exits. A write of non-zero value causes that to happen.
3916  * This should only be used by launchd, so we check that this is initproc.
3917  */
3918 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3919 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3920 {
3921 	unsigned int value = 0;
3922 	int changed = 0;
3923 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3924 	if (error || !changed) {
3925 		return error;
3926 	}
3927 	if (current_proc() != initproc) {
3928 		return EPERM;
3929 	}
3930 
3931 	vm_shared_region_pivot();
3932 
3933 	return 0;
3934 }
3935 
3936 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3937     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3938     0, 0, shared_region_pivot, "I", "");
3939 
3940 extern uint64_t vm_object_shadow_forced;
3941 extern uint64_t vm_object_shadow_skipped;
3942 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3943     &vm_object_shadow_forced, "");
3944 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3945     &vm_object_shadow_skipped, "");
3946 
3947 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3948     &vmtc_total, 0, "total text page corruptions detected");
3949 
3950 
3951 #if DEBUG || DEVELOPMENT
3952 /*
3953  * A sysctl that can be used to corrupt a text page with an illegal instruction.
3954  * Used for testing text page self healing.
3955  */
3956 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3957 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3958 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3959 {
3960 	uint64_t value = 0;
3961 	int error = sysctl_handle_quad(oidp, &value, 0, req);
3962 	if (error || !req->newptr) {
3963 		return error;
3964 	}
3965 
3966 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3967 		return 0;
3968 	} else {
3969 		return EINVAL;
3970 	}
3971 }
3972 
3973 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3974     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3975     0, 0, corrupt_text_addr, "-", "");
3976 #endif /* DEBUG || DEVELOPMENT */
3977 
3978 #if DEBUG || DEVELOPMENT
3979 #if CONFIG_MAP_RANGES
3980 static int
3981 vm_map_user_range_default SYSCTL_HANDLER_ARGS
3982 {
3983 #pragma unused(arg1, arg2, oidp)
3984 	struct mach_vm_range range;
3985 
3986 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
3987 	    != KERN_SUCCESS) {
3988 		return EINVAL;
3989 	}
3990 
3991 	return SYSCTL_OUT(req, &range, sizeof(range));
3992 }
3993 
3994 static int
3995 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
3996 {
3997 #pragma unused(arg1, arg2, oidp)
3998 	struct mach_vm_range range;
3999 
4000 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4001 	    != KERN_SUCCESS) {
4002 		return EINVAL;
4003 	}
4004 
4005 	return SYSCTL_OUT(req, &range, sizeof(range));
4006 }
4007 
4008 /*
4009  * A sysctl that can be used to return ranges for the current VM map.
4010  * Used for testing VM ranges.
4011  */
4012 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4013     0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4014 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4015     0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4016 
4017 #endif /* CONFIG_MAP_RANGES */
4018 #endif /* DEBUG || DEVELOPMENT */
4019 
4020 extern uint64_t c_seg_filled_no_contention;
4021 extern uint64_t c_seg_filled_contention;
4022 extern clock_sec_t c_seg_filled_contention_sec_max;
4023 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4024 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4025 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4026 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4027 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4028 #if (XNU_TARGET_OS_OSX && __arm64__)
4029 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4030 extern int c_process_major_yield_after; /* yield after moving ? segments */
4031 extern uint64_t c_process_major_reports;
4032 extern clock_sec_t c_process_major_max_sec;
4033 extern clock_nsec_t c_process_major_max_nsec;
4034 extern uint32_t c_process_major_peak_segcount;
4035 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4036 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4037 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4038 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4039 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4040 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4041 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4042 
4043 #if DEVELOPMENT || DEBUG
4044 extern int panic_object_not_alive;
4045 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4046 #endif /* DEVELOPMENT || DEBUG */
4047 
4048 #if MACH_ASSERT
4049 extern int fbdp_no_panic;
4050 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4051 #endif /* MACH_ASSERT */
4052