xref: /xnu-8796.121.2/bsd/vm/vm_unix.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57 
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #include <sys/code_signing.h>
86 #if NECP
87 #include <net/necp.h>
88 #endif /* NECP */
89 #if SKYWALK
90 #include <skywalk/os_channel.h>
91 #endif /* SKYWALK */
92 
93 #include <security/audit/audit.h>
94 #include <security/mac.h>
95 #include <bsm/audit_kevents.h>
96 
97 #include <kern/kalloc.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_kern.h>
100 #include <vm/vm_pageout.h>
101 
102 #include <mach/shared_region.h>
103 #include <vm/vm_shared_region.h>
104 
105 #include <vm/vm_dyld_pager.h>
106 
107 #include <vm/vm_protos.h>
108 
109 #include <sys/kern_memorystatus.h>
110 #include <sys/kern_memorystatus_freeze.h>
111 #include <sys/proc_internal.h>
112 
113 #include <mach-o/fixup-chains.h>
114 
115 #if CONFIG_MACF
116 #include <security/mac_framework.h>
117 #endif
118 
119 #include <kern/bits.h>
120 
121 #if CONFIG_CSR
122 #include <sys/csr.h>
123 #endif /* CONFIG_CSR */
124 #include <sys/trust_caches.h>
125 #include <libkern/amfi/amfi.h>
126 #include <IOKit/IOBSD.h>
127 
128 #if VM_MAP_DEBUG_APPLE_PROTECT
129 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
130 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
131 
132 #if VM_MAP_DEBUG_FOURK
133 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
134 #endif /* VM_MAP_DEBUG_FOURK */
135 
136 #if DEVELOPMENT || DEBUG
137 
138 static int
139 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
140 {
141 #pragma unused(arg1, arg2)
142 	vm_offset_t     kaddr;
143 	kern_return_t   kr;
144 	int     error = 0;
145 	int     size = 0;
146 
147 	error = sysctl_handle_int(oidp, &size, 0, req);
148 	if (error || !req->newptr) {
149 		return error;
150 	}
151 
152 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
153 	    0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
154 
155 	if (kr == KERN_SUCCESS) {
156 		kmem_free(kernel_map, kaddr, size);
157 	}
158 
159 	return error;
160 }
161 
162 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
163     0, 0, &sysctl_kmem_alloc_contig, "I", "");
164 
165 extern int vm_region_footprint;
166 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
167 
168 static int
169 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
170 {
171 #pragma unused(arg1, arg2, oidp)
172 	kmem_gobj_stats stats = kmem_get_gobj_stats();
173 
174 	return SYSCTL_OUT(req, &stats, sizeof(stats));
175 }
176 
177 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
178     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
179     0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
180 
181 #endif /* DEVELOPMENT || DEBUG */
182 
183 static int
184 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
185 {
186 #pragma unused(arg1, arg2, oidp)
187 	int     error = 0;
188 	int     value;
189 
190 	value = task_self_region_footprint();
191 	error = SYSCTL_OUT(req, &value, sizeof(int));
192 	if (error) {
193 		return error;
194 	}
195 
196 	if (!req->newptr) {
197 		return 0;
198 	}
199 
200 	error = SYSCTL_IN(req, &value, sizeof(int));
201 	if (error) {
202 		return error;
203 	}
204 	task_self_region_footprint_set(value);
205 	return 0;
206 }
207 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
208 
209 static int
210 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
211 {
212 #pragma unused(arg1, arg2, oidp)
213 	int     error = 0;
214 	int     value;
215 
216 	value = (1 << thread_self_region_page_shift());
217 	error = SYSCTL_OUT(req, &value, sizeof(int));
218 	if (error) {
219 		return error;
220 	}
221 
222 	if (!req->newptr) {
223 		return 0;
224 	}
225 
226 	error = SYSCTL_IN(req, &value, sizeof(int));
227 	if (error) {
228 		return error;
229 	}
230 
231 	if (value != 0 && value != 4096 && value != 16384) {
232 		return EINVAL;
233 	}
234 
235 #if !__ARM_MIXED_PAGE_SIZE__
236 	if (value != vm_map_page_size(current_map())) {
237 		return EINVAL;
238 	}
239 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
240 
241 	thread_self_region_page_shift_set(bit_first(value));
242 	return 0;
243 }
244 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
245 
246 
247 #if DEVELOPMENT || DEBUG
248 extern int panic_on_unsigned_execute;
249 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
250 
251 extern int vm_log_xnu_user_debug;
252 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
253 #endif /* DEVELOPMENT || DEBUG */
254 
255 extern int cs_executable_create_upl;
256 extern int cs_executable_wire;
257 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
259 
260 extern int apple_protect_pager_count;
261 extern int apple_protect_pager_count_mapped;
262 extern unsigned int apple_protect_pager_cache_limit;
263 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
265 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
266 
267 #if DEVELOPMENT || DEBUG
268 extern int radar_20146450;
269 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
270 
271 extern int macho_printf;
272 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
273 
274 extern int apple_protect_pager_data_request_debug;
275 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
276 
277 #if __arm64__
278 /* These are meant to support the page table accounting unit test. */
279 extern unsigned int arm_hardware_page_size;
280 extern unsigned int arm_pt_desc_size;
281 extern unsigned int arm_pt_root_size;
282 extern unsigned int inuse_user_tteroot_count;
283 extern unsigned int inuse_kernel_tteroot_count;
284 extern unsigned int inuse_user_ttepages_count;
285 extern unsigned int inuse_kernel_ttepages_count;
286 extern unsigned int inuse_user_ptepages_count;
287 extern unsigned int inuse_kernel_ptepages_count;
288 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
289 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
297 extern unsigned int free_page_size_tt_count;
298 extern unsigned int free_two_page_size_tt_count;
299 extern unsigned int free_tt_count;
300 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
301 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
303 #if DEVELOPMENT || DEBUG
304 extern unsigned long pmap_asid_flushes;
305 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
306 extern unsigned long pmap_asid_hits;
307 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
308 extern unsigned long pmap_asid_misses;
309 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
310 #endif
311 #endif /* __arm64__ */
312 
313 #if __arm64__
314 extern int fourk_pager_data_request_debug;
315 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
316 #endif /* __arm64__ */
317 #endif /* DEVELOPMENT || DEBUG */
318 
319 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
328 #if VM_SCAN_FOR_SHADOW_CHAIN
329 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
330 extern int proc_shadow_max(void);
331 static int
332 vm_shadow_max SYSCTL_HANDLER_ARGS
333 {
334 #pragma unused(arg1, arg2, oidp)
335 	int value = 0;
336 
337 	if (vm_shadow_max_enabled) {
338 		value = proc_shadow_max();
339 	}
340 
341 	return SYSCTL_OUT(req, &value, sizeof(value));
342 }
343 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
344     0, 0, &vm_shadow_max, "I", "");
345 
346 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
347 
348 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
349 
350 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
351 
352 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
353 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
354 /*
355  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
356  */
357 
358 #if DEVELOPMENT || DEBUG
359 extern int allow_stack_exec, allow_data_exec;
360 
361 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
362 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
363 
364 #endif /* DEVELOPMENT || DEBUG */
365 
366 static const char *prot_values[] = {
367 	"none",
368 	"read-only",
369 	"write-only",
370 	"read-write",
371 	"execute-only",
372 	"read-execute",
373 	"write-execute",
374 	"read-write-execute"
375 };
376 
377 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)378 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
379 {
380 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
381 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
382 }
383 
384 /*
385  * shared_region_unnest_logging: level of logging of unnesting events
386  * 0	- no logging
387  * 1	- throttled logging of unexpected unnesting events (default)
388  * 2	- unthrottled logging of unexpected unnesting events
389  * 3+	- unthrottled logging of all unnesting events
390  */
391 int shared_region_unnest_logging = 1;
392 
393 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
394     &shared_region_unnest_logging, 0, "");
395 
396 int vm_shared_region_unnest_log_interval = 10;
397 int shared_region_unnest_log_count_threshold = 5;
398 
399 
400 #if XNU_TARGET_OS_OSX
401 
402 #if defined (__x86_64__)
403 static int scdir_enforce = 1;
404 #else /* defined (__x86_64__) */
405 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
406 #endif /* defined (__x86_64__) */
407 
408 static char *scdir_path[] = {
409 	"/System/Library/dyld/",
410 	"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
411 	"/System/Cryptexes/OS/System/Library/dyld",
412 	NULL
413 };
414 
415 #else /* XNU_TARGET_OS_OSX */
416 
417 static int scdir_enforce = 0;
418 static char *scdir_path[] = {
419 	"/System/Library/Caches/com.apple.dyld/",
420 	"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
421 	"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 	NULL
423 };
424 
425 #endif /* XNU_TARGET_OS_OSX */
426 
427 static char *driverkit_scdir_path[] = {
428 	"/System/DriverKit/System/Library/dyld/",
429 #if XNU_TARGET_OS_OSX
430 	"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
431 #else
432 	"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
433 #endif /* XNU_TARGET_OS_OSX */
434 	"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
435 	NULL
436 };
437 
438 #ifndef SECURE_KERNEL
439 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
440 {
441 #if CONFIG_CSR
442 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
443 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
444 		return EPERM;
445 	}
446 #endif /* CONFIG_CSR */
447 	return sysctl_handle_int(oidp, arg1, arg2, req);
448 }
449 
450 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
451 #endif
452 
453 /* These log rate throttling state variables aren't thread safe, but
454  * are sufficient unto the task.
455  */
456 static int64_t last_unnest_log_time = 0;
457 static int shared_region_unnest_log_count = 0;
458 
459 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)460 log_unnest_badness(
461 	vm_map_t        m,
462 	vm_map_offset_t s,
463 	vm_map_offset_t e,
464 	boolean_t       is_nested_map,
465 	vm_map_offset_t lowest_unnestable_addr)
466 {
467 	struct timeval  tv;
468 
469 	if (shared_region_unnest_logging == 0) {
470 		return;
471 	}
472 
473 	if (shared_region_unnest_logging <= 2 &&
474 	    is_nested_map &&
475 	    s >= lowest_unnestable_addr) {
476 		/*
477 		 * Unnesting of writable map entries is fine.
478 		 */
479 		return;
480 	}
481 
482 	if (shared_region_unnest_logging <= 1) {
483 		microtime(&tv);
484 		if ((tv.tv_sec - last_unnest_log_time) <
485 		    vm_shared_region_unnest_log_interval) {
486 			if (shared_region_unnest_log_count++ >
487 			    shared_region_unnest_log_count_threshold) {
488 				return;
489 			}
490 		} else {
491 			last_unnest_log_time = tv.tv_sec;
492 			shared_region_unnest_log_count = 0;
493 		}
494 	}
495 
496 	DTRACE_VM4(log_unnest_badness,
497 	    vm_map_t, m,
498 	    vm_map_offset_t, s,
499 	    vm_map_offset_t, e,
500 	    vm_map_offset_t, lowest_unnestable_addr);
501 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
502 }
503 
504 int
useracc(user_addr_t addr,user_size_t len,int prot)505 useracc(
506 	user_addr_t     addr,
507 	user_size_t     len,
508 	int     prot)
509 {
510 	vm_map_t        map;
511 
512 	map = current_map();
513 	return vm_map_check_protection(
514 		map,
515 		vm_map_trunc_page(addr,
516 		vm_map_page_mask(map)),
517 		vm_map_round_page(addr + len,
518 		vm_map_page_mask(map)),
519 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
520 }
521 
522 int
vslock(user_addr_t addr,user_size_t len)523 vslock(
524 	user_addr_t     addr,
525 	user_size_t     len)
526 {
527 	kern_return_t   kret;
528 	vm_map_t        map;
529 
530 	map = current_map();
531 	kret = vm_map_wire_kernel(map,
532 	    vm_map_trunc_page(addr,
533 	    vm_map_page_mask(map)),
534 	    vm_map_round_page(addr + len,
535 	    vm_map_page_mask(map)),
536 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
537 	    FALSE);
538 
539 	switch (kret) {
540 	case KERN_SUCCESS:
541 		return 0;
542 	case KERN_INVALID_ADDRESS:
543 	case KERN_NO_SPACE:
544 		return ENOMEM;
545 	case KERN_PROTECTION_FAILURE:
546 		return EACCES;
547 	default:
548 		return EINVAL;
549 	}
550 }
551 
552 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)553 vsunlock(
554 	user_addr_t addr,
555 	user_size_t len,
556 	__unused int dirtied)
557 {
558 #if FIXME  /* [ */
559 	pmap_t          pmap;
560 	vm_page_t       pg;
561 	vm_map_offset_t vaddr;
562 	ppnum_t         paddr;
563 #endif  /* FIXME ] */
564 	kern_return_t   kret;
565 	vm_map_t        map;
566 
567 	map = current_map();
568 
569 #if FIXME  /* [ */
570 	if (dirtied) {
571 		pmap = get_task_pmap(current_task());
572 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
573 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
574 		    vaddr += PAGE_SIZE) {
575 			paddr = pmap_find_phys(pmap, vaddr);
576 			pg = PHYS_TO_VM_PAGE(paddr);
577 			vm_page_set_modified(pg);
578 		}
579 	}
580 #endif  /* FIXME ] */
581 #ifdef  lint
582 	dirtied++;
583 #endif  /* lint */
584 	kret = vm_map_unwire(map,
585 	    vm_map_trunc_page(addr,
586 	    vm_map_page_mask(map)),
587 	    vm_map_round_page(addr + len,
588 	    vm_map_page_mask(map)),
589 	    FALSE);
590 	switch (kret) {
591 	case KERN_SUCCESS:
592 		return 0;
593 	case KERN_INVALID_ADDRESS:
594 	case KERN_NO_SPACE:
595 		return ENOMEM;
596 	case KERN_PROTECTION_FAILURE:
597 		return EACCES;
598 	default:
599 		return EINVAL;
600 	}
601 }
602 
603 int
subyte(user_addr_t addr,int byte)604 subyte(
605 	user_addr_t addr,
606 	int byte)
607 {
608 	char character;
609 
610 	character = (char)byte;
611 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
612 }
613 
614 int
suibyte(user_addr_t addr,int byte)615 suibyte(
616 	user_addr_t addr,
617 	int byte)
618 {
619 	char character;
620 
621 	character = (char)byte;
622 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
623 }
624 
625 int
fubyte(user_addr_t addr)626 fubyte(user_addr_t addr)
627 {
628 	unsigned char byte;
629 
630 	if (copyin(addr, (void *) &byte, sizeof(char))) {
631 		return -1;
632 	}
633 	return byte;
634 }
635 
636 int
fuibyte(user_addr_t addr)637 fuibyte(user_addr_t addr)
638 {
639 	unsigned char byte;
640 
641 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
642 		return -1;
643 	}
644 	return byte;
645 }
646 
647 int
suword(user_addr_t addr,long word)648 suword(
649 	user_addr_t addr,
650 	long word)
651 {
652 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
653 }
654 
655 long
fuword(user_addr_t addr)656 fuword(user_addr_t addr)
657 {
658 	long word = 0;
659 
660 	if (copyin(addr, (void *) &word, sizeof(int))) {
661 		return -1;
662 	}
663 	return word;
664 }
665 
666 /* suiword and fuiword are the same as suword and fuword, respectively */
667 
668 int
suiword(user_addr_t addr,long word)669 suiword(
670 	user_addr_t addr,
671 	long word)
672 {
673 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
674 }
675 
676 long
fuiword(user_addr_t addr)677 fuiword(user_addr_t addr)
678 {
679 	long word = 0;
680 
681 	if (copyin(addr, (void *) &word, sizeof(int))) {
682 		return -1;
683 	}
684 	return word;
685 }
686 
687 /*
688  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
689  * fetching and setting of process-sized size_t and pointer values.
690  */
691 int
sulong(user_addr_t addr,int64_t word)692 sulong(user_addr_t addr, int64_t word)
693 {
694 	if (IS_64BIT_PROCESS(current_proc())) {
695 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
696 	} else {
697 		return suiword(addr, (long)word);
698 	}
699 }
700 
701 int64_t
fulong(user_addr_t addr)702 fulong(user_addr_t addr)
703 {
704 	int64_t longword;
705 
706 	if (IS_64BIT_PROCESS(current_proc())) {
707 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
708 			return -1;
709 		}
710 		return longword;
711 	} else {
712 		return (int64_t)fuiword(addr);
713 	}
714 }
715 
716 int
suulong(user_addr_t addr,uint64_t uword)717 suulong(user_addr_t addr, uint64_t uword)
718 {
719 	if (IS_64BIT_PROCESS(current_proc())) {
720 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
721 	} else {
722 		return suiword(addr, (uint32_t)uword);
723 	}
724 }
725 
726 uint64_t
fuulong(user_addr_t addr)727 fuulong(user_addr_t addr)
728 {
729 	uint64_t ulongword;
730 
731 	if (IS_64BIT_PROCESS(current_proc())) {
732 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
733 			return -1ULL;
734 		}
735 		return ulongword;
736 	} else {
737 		return (uint64_t)fuiword(addr);
738 	}
739 }
740 
741 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)742 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
743 {
744 	return ENOTSUP;
745 }
746 
747 /*
748  * pid_for_task
749  *
750  * Find the BSD process ID for the Mach task associated with the given Mach port
751  * name
752  *
753  * Parameters:	args		User argument descriptor (see below)
754  *
755  * Indirect parameters:	args->t		Mach port name
756  *                      args->pid	Process ID (returned value; see below)
757  *
758  * Returns:	KERL_SUCCESS	Success
759  *              KERN_FAILURE	Not success
760  *
761  * Implicit returns: args->pid		Process ID
762  *
763  */
764 kern_return_t
pid_for_task(struct pid_for_task_args * args)765 pid_for_task(
766 	struct pid_for_task_args *args)
767 {
768 	mach_port_name_t        t = args->t;
769 	user_addr_t             pid_addr  = args->pid;
770 	proc_t p;
771 	task_t          t1;
772 	int     pid = -1;
773 	kern_return_t   err = KERN_SUCCESS;
774 
775 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
776 	AUDIT_ARG(mach_port1, t);
777 
778 	t1 = port_name_to_task_name(t);
779 
780 	if (t1 == TASK_NULL) {
781 		err = KERN_FAILURE;
782 		goto pftout;
783 	} else {
784 		p = get_bsdtask_info(t1);
785 		if (p) {
786 			pid  = proc_pid(p);
787 			err = KERN_SUCCESS;
788 		} else if (task_is_a_corpse(t1)) {
789 			pid = task_pid(t1);
790 			err = KERN_SUCCESS;
791 		} else {
792 			err = KERN_FAILURE;
793 		}
794 	}
795 	task_deallocate(t1);
796 pftout:
797 	AUDIT_ARG(pid, pid);
798 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
799 	AUDIT_MACH_SYSCALL_EXIT(err);
800 	return err;
801 }
802 
803 /*
804  *
805  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
806  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
807  *
808  */
809 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
810 
811 /*
812  *	Routine:	task_for_pid_posix_check
813  *	Purpose:
814  *			Verify that the current process should be allowed to
815  *			get the target process's task port. This is only
816  *			permitted if:
817  *			- The current process is root
818  *			OR all of the following are true:
819  *			- The target process's real, effective, and saved uids
820  *			  are the same as the current proc's euid,
821  *			- The target process's group set is a subset of the
822  *			  calling process's group set, and
823  *			- The target process hasn't switched credentials.
824  *
825  *	Returns:	TRUE: permitted
826  *			FALSE: denied
827  */
828 static int
task_for_pid_posix_check(proc_t target)829 task_for_pid_posix_check(proc_t target)
830 {
831 	kauth_cred_t targetcred, mycred;
832 	bool checkcredentials;
833 	uid_t myuid;
834 	int allowed;
835 
836 	/* No task_for_pid on bad targets */
837 	if (target->p_stat == SZOMB) {
838 		return FALSE;
839 	}
840 
841 	mycred = kauth_cred_get();
842 	myuid = kauth_cred_getuid(mycred);
843 
844 	/* If we're running as root, the check passes */
845 	if (kauth_cred_issuser(mycred)) {
846 		return TRUE;
847 	}
848 
849 	/* We're allowed to get our own task port */
850 	if (target == current_proc()) {
851 		return TRUE;
852 	}
853 
854 	/*
855 	 * Under DENY, only root can get another proc's task port,
856 	 * so no more checks are needed.
857 	 */
858 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
859 		return FALSE;
860 	}
861 
862 	targetcred = kauth_cred_proc_ref(target);
863 	allowed = TRUE;
864 
865 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
866 
867 	if (checkcredentials) {
868 		/* Do target's ruid, euid, and saved uid match my euid? */
869 		if ((kauth_cred_getuid(targetcred) != myuid) ||
870 		    (kauth_cred_getruid(targetcred) != myuid) ||
871 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
872 			allowed = FALSE;
873 			goto out;
874 		}
875 		/* Are target's groups a subset of my groups? */
876 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
877 		    allowed == 0) {
878 			allowed = FALSE;
879 			goto out;
880 		}
881 	}
882 
883 	/* Has target switched credentials? */
884 	if (target->p_flag & P_SUGID) {
885 		allowed = FALSE;
886 		goto out;
887 	}
888 
889 out:
890 	kauth_cred_unref(&targetcred);
891 	return allowed;
892 }
893 
894 /*
895  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
896  *
897  *	Description:	Waits for the user space daemon to respond to the request
898  *			we made. Function declared non inline to be visible in
899  *			stackshots and spindumps as well as debugging.
900  */
901 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)902 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
903 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
904 {
905 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
906 }
907 
908 /*
909  *	Routine:	task_for_pid
910  *	Purpose:
911  *		Get the task port for another "process", named by its
912  *		process ID on the same host as "target_task".
913  *
914  *		Only permitted to privileged processes, or processes
915  *		with the same user ID.
916  *
917  *		Note: if pid == 0, an error is return no matter who is calling.
918  *
919  * XXX This should be a BSD system call, not a Mach trap!!!
920  */
921 kern_return_t
task_for_pid(struct task_for_pid_args * args)922 task_for_pid(
923 	struct task_for_pid_args *args)
924 {
925 	mach_port_name_t        target_tport = args->target_tport;
926 	int                     pid = args->pid;
927 	user_addr_t             task_addr = args->t;
928 	proc_t                  p = PROC_NULL;
929 	task_t                  t1 = TASK_NULL;
930 	task_t                  task = TASK_NULL;
931 	mach_port_name_t        tret = MACH_PORT_NULL;
932 	ipc_port_t              tfpport = MACH_PORT_NULL;
933 	void                    * sright = NULL;
934 	int                     error = 0;
935 	boolean_t               is_current_proc = FALSE;
936 	struct proc_ident       pident = {0};
937 
938 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
939 	AUDIT_ARG(pid, pid);
940 	AUDIT_ARG(mach_port1, target_tport);
941 
942 	/* Always check if pid == 0 */
943 	if (pid == 0) {
944 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
945 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
946 		return KERN_FAILURE;
947 	}
948 
949 	t1 = port_name_to_task(target_tport);
950 	if (t1 == TASK_NULL) {
951 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
952 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
953 		return KERN_FAILURE;
954 	}
955 
956 
957 	p = proc_find(pid);
958 	if (p == PROC_NULL) {
959 		error = KERN_FAILURE;
960 		goto tfpout;
961 	}
962 	pident = proc_ident(p);
963 	is_current_proc = (p == current_proc());
964 
965 #if CONFIG_AUDIT
966 	AUDIT_ARG(process, p);
967 #endif
968 
969 	if (!(task_for_pid_posix_check(p))) {
970 		error = KERN_FAILURE;
971 		goto tfpout;
972 	}
973 
974 	if (proc_task(p) == TASK_NULL) {
975 		error = KERN_SUCCESS;
976 		goto tfpout;
977 	}
978 
979 	/*
980 	 * Grab a task reference and drop the proc reference as the proc ref
981 	 * shouldn't be held accross upcalls.
982 	 */
983 	task = proc_task(p);
984 	task_reference(task);
985 
986 	proc_rele(p);
987 	p = PROC_NULL;
988 
989 #if CONFIG_MACF
990 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
991 	if (error) {
992 		error = KERN_FAILURE;
993 		goto tfpout;
994 	}
995 #endif
996 
997 	/* If we aren't root and target's task access port is set... */
998 	if (!kauth_cred_issuser(kauth_cred_get()) &&
999 	    !is_current_proc &&
1000 	    (task_get_task_access_port(task, &tfpport) == 0) &&
1001 	    (tfpport != IPC_PORT_NULL)) {
1002 		if (tfpport == IPC_PORT_DEAD) {
1003 			error = KERN_PROTECTION_FAILURE;
1004 			goto tfpout;
1005 		}
1006 
1007 		/* Call up to the task access server */
1008 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1009 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1010 
1011 		if (error != MACH_MSG_SUCCESS) {
1012 			if (error == MACH_RCV_INTERRUPTED) {
1013 				error = KERN_ABORTED;
1014 			} else {
1015 				error = KERN_FAILURE;
1016 			}
1017 			goto tfpout;
1018 		}
1019 	}
1020 
1021 	/* Grant task port access */
1022 	extmod_statistics_incr_task_for_pid(task);
1023 
1024 	/* this reference will be consumed during conversion */
1025 	task_reference(task);
1026 	if (task == current_task()) {
1027 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
1028 		sright = (void *)convert_task_to_port_pinned(task);
1029 	} else {
1030 		sright = (void *)convert_task_to_port(task);
1031 	}
1032 	/* extra task ref consumed */
1033 
1034 	/*
1035 	 * Check if the task has been corpsified. We must do so after conversion
1036 	 * since we don't hold locks and may have grabbed a corpse control port
1037 	 * above which will prevent no-senders notification delivery.
1038 	 */
1039 	if (task_is_a_corpse(task)) {
1040 		ipc_port_release_send(sright);
1041 		error = KERN_FAILURE;
1042 		goto tfpout;
1043 	}
1044 
1045 	tret = ipc_port_copyout_send(
1046 		sright,
1047 		get_task_ipcspace(current_task()));
1048 
1049 	error = KERN_SUCCESS;
1050 
1051 tfpout:
1052 	task_deallocate(t1);
1053 	AUDIT_ARG(mach_port2, tret);
1054 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1055 
1056 	if (tfpport != IPC_PORT_NULL) {
1057 		ipc_port_release_send(tfpport);
1058 	}
1059 	if (task != TASK_NULL) {
1060 		task_deallocate(task);
1061 	}
1062 	if (p != PROC_NULL) {
1063 		proc_rele(p);
1064 	}
1065 	AUDIT_MACH_SYSCALL_EXIT(error);
1066 	return error;
1067 }
1068 
1069 /*
1070  *	Routine:	task_name_for_pid
1071  *	Purpose:
1072  *		Get the task name port for another "process", named by its
1073  *		process ID on the same host as "target_task".
1074  *
1075  *		Only permitted to privileged processes, or processes
1076  *		with the same user ID.
1077  *
1078  * XXX This should be a BSD system call, not a Mach trap!!!
1079  */
1080 
1081 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1082 task_name_for_pid(
1083 	struct task_name_for_pid_args *args)
1084 {
1085 	mach_port_name_t        target_tport = args->target_tport;
1086 	int                     pid = args->pid;
1087 	user_addr_t             task_addr = args->t;
1088 	proc_t                  p = PROC_NULL;
1089 	task_t                  t1 = TASK_NULL;
1090 	mach_port_name_t        tret = MACH_PORT_NULL;
1091 	void * sright;
1092 	int error = 0, refheld = 0;
1093 	kauth_cred_t target_cred;
1094 
1095 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1096 	AUDIT_ARG(pid, pid);
1097 	AUDIT_ARG(mach_port1, target_tport);
1098 
1099 	t1 = port_name_to_task(target_tport);
1100 	if (t1 == TASK_NULL) {
1101 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1102 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1103 		return KERN_FAILURE;
1104 	}
1105 
1106 	p = proc_find(pid);
1107 	if (p != PROC_NULL) {
1108 		AUDIT_ARG(process, p);
1109 		target_cred = kauth_cred_proc_ref(p);
1110 		refheld = 1;
1111 
1112 		if ((p->p_stat != SZOMB)
1113 		    && ((current_proc() == p)
1114 		    || kauth_cred_issuser(kauth_cred_get())
1115 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1116 		    ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1117 		    || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1118 		    )) {
1119 			if (proc_task(p) != TASK_NULL) {
1120 				struct proc_ident pident = proc_ident(p);
1121 
1122 				task_t task = proc_task(p);
1123 
1124 				task_reference(task);
1125 				proc_rele(p);
1126 				p = PROC_NULL;
1127 #if CONFIG_MACF
1128 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1129 				if (error) {
1130 					task_deallocate(task);
1131 					goto noperm;
1132 				}
1133 #endif
1134 				sright = (void *)convert_task_name_to_port(task);
1135 				task = NULL;
1136 				tret = ipc_port_copyout_send(sright,
1137 				    get_task_ipcspace(current_task()));
1138 			} else {
1139 				tret  = MACH_PORT_NULL;
1140 			}
1141 
1142 			AUDIT_ARG(mach_port2, tret);
1143 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1144 			task_deallocate(t1);
1145 			error = KERN_SUCCESS;
1146 			goto tnfpout;
1147 		}
1148 	}
1149 
1150 #if CONFIG_MACF
1151 noperm:
1152 #endif
1153 	task_deallocate(t1);
1154 	tret = MACH_PORT_NULL;
1155 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1156 	error = KERN_FAILURE;
1157 tnfpout:
1158 	if (refheld != 0) {
1159 		kauth_cred_unref(&target_cred);
1160 	}
1161 	if (p != PROC_NULL) {
1162 		proc_rele(p);
1163 	}
1164 	AUDIT_MACH_SYSCALL_EXIT(error);
1165 	return error;
1166 }
1167 
1168 /*
1169  *	Routine:	task_inspect_for_pid
1170  *	Purpose:
1171  *		Get the task inspect port for another "process", named by its
1172  *		process ID on the same host as "target_task".
1173  */
1174 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1175 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1176 {
1177 	mach_port_name_t        target_tport = args->target_tport;
1178 	int                     pid = args->pid;
1179 	user_addr_t             task_addr = args->t;
1180 
1181 	proc_t                  proc = PROC_NULL;
1182 	task_t                  t1 = TASK_NULL;
1183 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1184 	mach_port_name_t        tret = MACH_PORT_NULL;
1185 	ipc_port_t              tfpport = MACH_PORT_NULL;
1186 	int                     error = 0;
1187 	void                    *sright = NULL;
1188 	boolean_t               is_current_proc = FALSE;
1189 	struct proc_ident       pident = {0};
1190 
1191 	/* Disallow inspect port for kernel_task */
1192 	if (pid == 0) {
1193 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1194 		return EPERM;
1195 	}
1196 
1197 	t1 = port_name_to_task(target_tport);
1198 	if (t1 == TASK_NULL) {
1199 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1200 		return EINVAL;
1201 	}
1202 
1203 	proc = proc_find(pid);
1204 	if (proc == PROC_NULL) {
1205 		error = ESRCH;
1206 		goto tifpout;
1207 	}
1208 	pident = proc_ident(proc);
1209 	is_current_proc = (proc == current_proc());
1210 
1211 	if (!(task_for_pid_posix_check(proc))) {
1212 		error = EPERM;
1213 		goto tifpout;
1214 	}
1215 
1216 	task_insp = proc_task(proc);
1217 	if (task_insp == TASK_INSPECT_NULL) {
1218 		goto tifpout;
1219 	}
1220 
1221 	/*
1222 	 * Grab a task reference and drop the proc reference before making any upcalls.
1223 	 */
1224 	task_reference(task_insp);
1225 
1226 	proc_rele(proc);
1227 	proc = PROC_NULL;
1228 
1229 #if CONFIG_MACF
1230 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1231 	if (error) {
1232 		error = EPERM;
1233 		goto tifpout;
1234 	}
1235 #endif
1236 
1237 	/* If we aren't root and target's task access port is set... */
1238 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1239 	    !is_current_proc &&
1240 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1241 	    (tfpport != IPC_PORT_NULL)) {
1242 		if (tfpport == IPC_PORT_DEAD) {
1243 			error = EACCES;
1244 			goto tifpout;
1245 		}
1246 
1247 
1248 		/* Call up to the task access server */
1249 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1250 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1251 
1252 		if (error != MACH_MSG_SUCCESS) {
1253 			if (error == MACH_RCV_INTERRUPTED) {
1254 				error = EINTR;
1255 			} else {
1256 				error = EPERM;
1257 			}
1258 			goto tifpout;
1259 		}
1260 	}
1261 
1262 	/* Check if the task has been corpsified */
1263 	if (task_is_a_corpse(task_insp)) {
1264 		error = EACCES;
1265 		goto tifpout;
1266 	}
1267 
1268 	/* could be IP_NULL, consumes a ref */
1269 	sright = (void*) convert_task_inspect_to_port(task_insp);
1270 	task_insp = TASK_INSPECT_NULL;
1271 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1272 
1273 tifpout:
1274 	task_deallocate(t1);
1275 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1276 	if (proc != PROC_NULL) {
1277 		proc_rele(proc);
1278 	}
1279 	if (tfpport != IPC_PORT_NULL) {
1280 		ipc_port_release_send(tfpport);
1281 	}
1282 	if (task_insp != TASK_INSPECT_NULL) {
1283 		task_deallocate(task_insp);
1284 	}
1285 
1286 	*ret = error;
1287 	return error;
1288 }
1289 
1290 /*
1291  *	Routine:	task_read_for_pid
1292  *	Purpose:
1293  *		Get the task read port for another "process", named by its
1294  *		process ID on the same host as "target_task".
1295  */
1296 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1297 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1298 {
1299 	mach_port_name_t        target_tport = args->target_tport;
1300 	int                     pid = args->pid;
1301 	user_addr_t             task_addr = args->t;
1302 
1303 	proc_t                  proc = PROC_NULL;
1304 	task_t                  t1 = TASK_NULL;
1305 	task_read_t             task_read = TASK_READ_NULL;
1306 	mach_port_name_t        tret = MACH_PORT_NULL;
1307 	ipc_port_t              tfpport = MACH_PORT_NULL;
1308 	int                     error = 0;
1309 	void                    *sright = NULL;
1310 	boolean_t               is_current_proc = FALSE;
1311 	struct proc_ident       pident = {0};
1312 
1313 	/* Disallow read port for kernel_task */
1314 	if (pid == 0) {
1315 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1316 		return EPERM;
1317 	}
1318 
1319 	t1 = port_name_to_task(target_tport);
1320 	if (t1 == TASK_NULL) {
1321 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1322 		return EINVAL;
1323 	}
1324 
1325 	proc = proc_find(pid);
1326 	if (proc == PROC_NULL) {
1327 		error = ESRCH;
1328 		goto trfpout;
1329 	}
1330 	pident = proc_ident(proc);
1331 	is_current_proc = (proc == current_proc());
1332 
1333 	if (!(task_for_pid_posix_check(proc))) {
1334 		error = EPERM;
1335 		goto trfpout;
1336 	}
1337 
1338 	task_read = proc_task(proc);
1339 	if (task_read == TASK_INSPECT_NULL) {
1340 		goto trfpout;
1341 	}
1342 
1343 	/*
1344 	 * Grab a task reference and drop the proc reference before making any upcalls.
1345 	 */
1346 	task_reference(task_read);
1347 
1348 	proc_rele(proc);
1349 	proc = PROC_NULL;
1350 
1351 #if CONFIG_MACF
1352 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1353 	if (error) {
1354 		error = EPERM;
1355 		goto trfpout;
1356 	}
1357 #endif
1358 
1359 	/* If we aren't root and target's task access port is set... */
1360 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1361 	    !is_current_proc &&
1362 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1363 	    (tfpport != IPC_PORT_NULL)) {
1364 		if (tfpport == IPC_PORT_DEAD) {
1365 			error = EACCES;
1366 			goto trfpout;
1367 		}
1368 
1369 
1370 		/* Call up to the task access server */
1371 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1372 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1373 
1374 		if (error != MACH_MSG_SUCCESS) {
1375 			if (error == MACH_RCV_INTERRUPTED) {
1376 				error = EINTR;
1377 			} else {
1378 				error = EPERM;
1379 			}
1380 			goto trfpout;
1381 		}
1382 	}
1383 
1384 	/* Check if the task has been corpsified */
1385 	if (task_is_a_corpse(task_read)) {
1386 		error = EACCES;
1387 		goto trfpout;
1388 	}
1389 
1390 	/* could be IP_NULL, consumes a ref */
1391 	sright = (void*) convert_task_read_to_port(task_read);
1392 	task_read = TASK_READ_NULL;
1393 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1394 
1395 trfpout:
1396 	task_deallocate(t1);
1397 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1398 	if (proc != PROC_NULL) {
1399 		proc_rele(proc);
1400 	}
1401 	if (tfpport != IPC_PORT_NULL) {
1402 		ipc_port_release_send(tfpport);
1403 	}
1404 	if (task_read != TASK_READ_NULL) {
1405 		task_deallocate(task_read);
1406 	}
1407 
1408 	*ret = error;
1409 	return error;
1410 }
1411 
1412 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1413 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1414 {
1415 	task_t  target = NULL;
1416 	proc_t  targetproc = PROC_NULL;
1417 	int     pid = args->pid;
1418 	int     error = 0;
1419 	mach_port_t tfpport = MACH_PORT_NULL;
1420 
1421 	if (pid == 0) {
1422 		error = EPERM;
1423 		goto out;
1424 	}
1425 
1426 	targetproc = proc_find(pid);
1427 	if (targetproc == PROC_NULL) {
1428 		error = ESRCH;
1429 		goto out;
1430 	}
1431 
1432 	if (!task_for_pid_posix_check(targetproc) &&
1433 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1434 		error = EPERM;
1435 		goto out;
1436 	}
1437 
1438 #if CONFIG_MACF
1439 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1440 	if (error) {
1441 		error = EPERM;
1442 		goto out;
1443 	}
1444 #endif
1445 
1446 	target = proc_task(targetproc);
1447 #if XNU_TARGET_OS_OSX
1448 	if (target != TASK_NULL) {
1449 		/* If we aren't root and target's task access port is set... */
1450 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1451 		    targetproc != current_proc() &&
1452 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1453 		    (tfpport != IPC_PORT_NULL)) {
1454 			if (tfpport == IPC_PORT_DEAD) {
1455 				error = EACCES;
1456 				goto out;
1457 			}
1458 
1459 			/* Call up to the task access server */
1460 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1461 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1462 
1463 			if (error != MACH_MSG_SUCCESS) {
1464 				if (error == MACH_RCV_INTERRUPTED) {
1465 					error = EINTR;
1466 				} else {
1467 					error = EPERM;
1468 				}
1469 				goto out;
1470 			}
1471 		}
1472 	}
1473 #endif /* XNU_TARGET_OS_OSX */
1474 
1475 	task_reference(target);
1476 	error = task_pidsuspend(target);
1477 	if (error) {
1478 		if (error == KERN_INVALID_ARGUMENT) {
1479 			error = EINVAL;
1480 		} else {
1481 			error = EPERM;
1482 		}
1483 	}
1484 #if CONFIG_MEMORYSTATUS
1485 	else {
1486 		memorystatus_on_suspend(targetproc);
1487 	}
1488 #endif
1489 
1490 	task_deallocate(target);
1491 
1492 out:
1493 	if (tfpport != IPC_PORT_NULL) {
1494 		ipc_port_release_send(tfpport);
1495 	}
1496 
1497 	if (targetproc != PROC_NULL) {
1498 		proc_rele(targetproc);
1499 	}
1500 	*ret = error;
1501 	return error;
1502 }
1503 
1504 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1505 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1506 {
1507 	mach_port_name_t        target_tport = args->target_tport;
1508 	int                     pid = args->pid;
1509 	user_addr_t             task_addr = args->t;
1510 	proc_t                  p = PROC_NULL;
1511 	task_t                  t1 = TASK_NULL;
1512 	task_t                  task = TASK_NULL;
1513 	mach_port_name_t        tret = MACH_PORT_NULL;
1514 	ipc_port_t              tfpport = MACH_PORT_NULL;
1515 	ipc_port_t              sright = NULL;
1516 	int                     error = 0;
1517 	boolean_t               is_current_proc = FALSE;
1518 	struct proc_ident       pident = {0};
1519 
1520 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1521 	AUDIT_ARG(pid, pid);
1522 	AUDIT_ARG(mach_port1, target_tport);
1523 
1524 	/* Always check if pid == 0 */
1525 	if (pid == 0) {
1526 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1527 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1528 		return KERN_FAILURE;
1529 	}
1530 
1531 	t1 = port_name_to_task(target_tport);
1532 	if (t1 == TASK_NULL) {
1533 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1534 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1535 		return KERN_FAILURE;
1536 	}
1537 
1538 	p = proc_find(pid);
1539 	if (p == PROC_NULL) {
1540 		error = KERN_FAILURE;
1541 		goto tfpout;
1542 	}
1543 	pident = proc_ident(p);
1544 	is_current_proc = (p == current_proc());
1545 
1546 #if CONFIG_AUDIT
1547 	AUDIT_ARG(process, p);
1548 #endif
1549 
1550 	if (!(task_for_pid_posix_check(p))) {
1551 		error = KERN_FAILURE;
1552 		goto tfpout;
1553 	}
1554 
1555 	if (proc_task(p) == TASK_NULL) {
1556 		error = KERN_SUCCESS;
1557 		goto tfpout;
1558 	}
1559 
1560 	/*
1561 	 * Grab a task reference and drop the proc reference before making any upcalls.
1562 	 */
1563 	task = proc_task(p);
1564 	task_reference(task);
1565 
1566 	proc_rele(p);
1567 	p = PROC_NULL;
1568 
1569 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1570 #if CONFIG_MACF
1571 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1572 		if (error) {
1573 			error = KERN_FAILURE;
1574 			goto tfpout;
1575 		}
1576 #endif
1577 
1578 		/* If we aren't root and target's task access port is set... */
1579 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1580 		    !is_current_proc &&
1581 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1582 		    (tfpport != IPC_PORT_NULL)) {
1583 			if (tfpport == IPC_PORT_DEAD) {
1584 				error = KERN_PROTECTION_FAILURE;
1585 				goto tfpout;
1586 			}
1587 
1588 
1589 			/* Call up to the task access server */
1590 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1591 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1592 
1593 			if (error != MACH_MSG_SUCCESS) {
1594 				if (error == MACH_RCV_INTERRUPTED) {
1595 					error = KERN_ABORTED;
1596 				} else {
1597 					error = KERN_FAILURE;
1598 				}
1599 				goto tfpout;
1600 			}
1601 		}
1602 	}
1603 
1604 	/* Check if the task has been corpsified */
1605 	if (task_is_a_corpse(task)) {
1606 		error = KERN_FAILURE;
1607 		goto tfpout;
1608 	}
1609 
1610 	error = task_get_debug_control_port(task, &sright);
1611 	if (error != KERN_SUCCESS) {
1612 		goto tfpout;
1613 	}
1614 
1615 	tret = ipc_port_copyout_send(
1616 		sright,
1617 		get_task_ipcspace(current_task()));
1618 
1619 	error = KERN_SUCCESS;
1620 
1621 tfpout:
1622 	task_deallocate(t1);
1623 	AUDIT_ARG(mach_port2, tret);
1624 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1625 
1626 	if (tfpport != IPC_PORT_NULL) {
1627 		ipc_port_release_send(tfpport);
1628 	}
1629 	if (task != TASK_NULL) {
1630 		task_deallocate(task);
1631 	}
1632 	if (p != PROC_NULL) {
1633 		proc_rele(p);
1634 	}
1635 	AUDIT_MACH_SYSCALL_EXIT(error);
1636 	return error;
1637 }
1638 
1639 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1640 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1641 {
1642 	task_t  target = NULL;
1643 	proc_t  targetproc = PROC_NULL;
1644 	int     pid = args->pid;
1645 	int     error = 0;
1646 	mach_port_t tfpport = MACH_PORT_NULL;
1647 
1648 	if (pid == 0) {
1649 		error = EPERM;
1650 		goto out;
1651 	}
1652 
1653 	targetproc = proc_find(pid);
1654 	if (targetproc == PROC_NULL) {
1655 		error = ESRCH;
1656 		goto out;
1657 	}
1658 
1659 	if (!task_for_pid_posix_check(targetproc) &&
1660 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1661 		error = EPERM;
1662 		goto out;
1663 	}
1664 
1665 #if CONFIG_MACF
1666 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1667 	if (error) {
1668 		error = EPERM;
1669 		goto out;
1670 	}
1671 #endif
1672 
1673 	target = proc_task(targetproc);
1674 #if XNU_TARGET_OS_OSX
1675 	if (target != TASK_NULL) {
1676 		/* If we aren't root and target's task access port is set... */
1677 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1678 		    targetproc != current_proc() &&
1679 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1680 		    (tfpport != IPC_PORT_NULL)) {
1681 			if (tfpport == IPC_PORT_DEAD) {
1682 				error = EACCES;
1683 				goto out;
1684 			}
1685 
1686 			/* Call up to the task access server */
1687 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1688 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1689 
1690 			if (error != MACH_MSG_SUCCESS) {
1691 				if (error == MACH_RCV_INTERRUPTED) {
1692 					error = EINTR;
1693 				} else {
1694 					error = EPERM;
1695 				}
1696 				goto out;
1697 			}
1698 		}
1699 	}
1700 #endif /* XNU_TARGET_OS_OSX */
1701 
1702 #if !XNU_TARGET_OS_OSX
1703 #if SOCKETS
1704 	resume_proc_sockets(targetproc);
1705 #endif /* SOCKETS */
1706 #endif /* !XNU_TARGET_OS_OSX */
1707 
1708 	task_reference(target);
1709 
1710 #if CONFIG_MEMORYSTATUS
1711 	memorystatus_on_resume(targetproc);
1712 #endif
1713 
1714 	error = task_pidresume(target);
1715 	if (error) {
1716 		if (error == KERN_INVALID_ARGUMENT) {
1717 			error = EINVAL;
1718 		} else {
1719 			if (error == KERN_MEMORY_ERROR) {
1720 				psignal(targetproc, SIGKILL);
1721 				error = EIO;
1722 			} else {
1723 				error = EPERM;
1724 			}
1725 		}
1726 	}
1727 
1728 	task_deallocate(target);
1729 
1730 out:
1731 	if (tfpport != IPC_PORT_NULL) {
1732 		ipc_port_release_send(tfpport);
1733 	}
1734 
1735 	if (targetproc != PROC_NULL) {
1736 		proc_rele(targetproc);
1737 	}
1738 
1739 	*ret = error;
1740 	return error;
1741 }
1742 
1743 #if !XNU_TARGET_OS_OSX
1744 /*
1745  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1746  * When a process is specified, this call is blocking, otherwise we wake up the
1747  * freezer thread and do not block on a process being frozen.
1748  */
1749 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1750 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1751 {
1752 	int     error = 0;
1753 	proc_t  targetproc = PROC_NULL;
1754 	int     pid = args->pid;
1755 
1756 #ifndef CONFIG_FREEZE
1757 	#pragma unused(pid)
1758 #else
1759 
1760 	/*
1761 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1762 	 */
1763 
1764 	if (pid >= 0) {
1765 		targetproc = proc_find(pid);
1766 
1767 		if (targetproc == PROC_NULL) {
1768 			error = ESRCH;
1769 			goto out;
1770 		}
1771 
1772 		if (!task_for_pid_posix_check(targetproc)) {
1773 			error = EPERM;
1774 			goto out;
1775 		}
1776 	}
1777 
1778 #if CONFIG_MACF
1779 	//Note that targetproc may be null
1780 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1781 	if (error) {
1782 		error = EPERM;
1783 		goto out;
1784 	}
1785 #endif
1786 
1787 	if (pid == -2) {
1788 		vm_pageout_anonymous_pages();
1789 	} else if (pid == -1) {
1790 		memorystatus_on_inactivity(targetproc);
1791 	} else {
1792 		error = memorystatus_freeze_process_sync(targetproc);
1793 	}
1794 
1795 out:
1796 
1797 #endif /* CONFIG_FREEZE */
1798 
1799 	if (targetproc != PROC_NULL) {
1800 		proc_rele(targetproc);
1801 	}
1802 	*ret = error;
1803 	return error;
1804 }
1805 #endif /* !XNU_TARGET_OS_OSX */
1806 
1807 #if SOCKETS
1808 int
networking_memstatus_callout(proc_t p,uint32_t status)1809 networking_memstatus_callout(proc_t p, uint32_t status)
1810 {
1811 	struct fileproc *fp;
1812 
1813 	/*
1814 	 * proc list lock NOT held
1815 	 * proc lock NOT held
1816 	 * a reference on the proc has been held / shall be dropped by the caller.
1817 	 */
1818 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1819 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1820 
1821 	proc_fdlock(p);
1822 
1823 	fdt_foreach(fp, p) {
1824 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1825 #if NECP
1826 		case DTYPE_NETPOLICY:
1827 			necp_fd_memstatus(p, status,
1828 			    (struct necp_fd_data *)fp_get_data(fp));
1829 			break;
1830 #endif /* NECP */
1831 #if SKYWALK
1832 		case DTYPE_CHANNEL:
1833 			kern_channel_memstatus(p, status,
1834 			    (struct kern_channel *)fp_get_data(fp));
1835 			break;
1836 #endif /* SKYWALK */
1837 		default:
1838 			break;
1839 		}
1840 	}
1841 	proc_fdunlock(p);
1842 
1843 	return 1;
1844 }
1845 
1846 #if SKYWALK
1847 /*
1848  * Since we make multiple passes across the fileproc array, record the
1849  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1850  * large enough to accomodate most, if not all cases.  If we find more,
1851  * we'll go to the slow path during second pass.
1852  */
1853 #define MAX_CHANNELS    8       /* should be more than enough */
1854 #endif /* SKYWALK */
1855 
1856 static int
networking_defunct_callout(proc_t p,void * arg)1857 networking_defunct_callout(proc_t p, void *arg)
1858 {
1859 	struct pid_shutdown_sockets_args *args = arg;
1860 	int pid = args->pid;
1861 	int level = args->level;
1862 	struct fileproc *fp;
1863 #if SKYWALK
1864 	int i;
1865 	int channel_count = 0;
1866 	struct kern_channel *channel_array[MAX_CHANNELS];
1867 
1868 	bzero(&channel_array, sizeof(channel_array));
1869 #endif /* SKYWALK */
1870 
1871 	proc_fdlock(p);
1872 
1873 	fdt_foreach(fp, p) {
1874 		struct fileglob *fg = fp->fp_glob;
1875 
1876 		switch (FILEGLOB_DTYPE(fg)) {
1877 		case DTYPE_SOCKET: {
1878 			struct socket *so = (struct socket *)fg_get_data(fg);
1879 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1880 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1881 				/* Call networking stack with socket and level */
1882 				(void)socket_defunct(p, so, level);
1883 			}
1884 			break;
1885 		}
1886 #if NECP
1887 		case DTYPE_NETPOLICY:
1888 			/* first pass: defunct necp and get stats for ntstat */
1889 			if (proc_getpid(p) == pid) {
1890 				necp_fd_defunct(p,
1891 				    (struct necp_fd_data *)fg_get_data(fg));
1892 			}
1893 			break;
1894 #endif /* NECP */
1895 #if SKYWALK
1896 		case DTYPE_CHANNEL:
1897 			/* first pass: get channels and total count */
1898 			if (proc_getpid(p) == pid) {
1899 				if (channel_count < MAX_CHANNELS) {
1900 					channel_array[channel_count] =
1901 					    (struct kern_channel *)fg_get_data(fg);
1902 				}
1903 				++channel_count;
1904 			}
1905 			break;
1906 #endif /* SKYWALK */
1907 		default:
1908 			break;
1909 		}
1910 	}
1911 
1912 #if SKYWALK
1913 	/*
1914 	 * Second pass: defunct channels/flows (after NECP).  Handle
1915 	 * the common case of up to MAX_CHANNELS count with fast path,
1916 	 * and traverse the fileproc array again only if we exceed it.
1917 	 */
1918 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1919 		ASSERT(proc_getpid(p) == pid);
1920 		for (i = 0; i < channel_count; i++) {
1921 			ASSERT(channel_array[i] != NULL);
1922 			kern_channel_defunct(p, channel_array[i]);
1923 		}
1924 	} else if (channel_count != 0) {
1925 		ASSERT(proc_getpid(p) == pid);
1926 		fdt_foreach(fp, p) {
1927 			struct fileglob *fg = fp->fp_glob;
1928 
1929 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1930 				kern_channel_defunct(p,
1931 				    (struct kern_channel *)fg_get_data(fg));
1932 			}
1933 		}
1934 	}
1935 #endif /* SKYWALK */
1936 	proc_fdunlock(p);
1937 
1938 	return PROC_RETURNED;
1939 }
1940 
1941 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1942 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1943 {
1944 	int                             error = 0;
1945 	proc_t                          targetproc = PROC_NULL;
1946 	int                             pid = args->pid;
1947 	int                             level = args->level;
1948 
1949 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1950 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1951 		error = EINVAL;
1952 		goto out;
1953 	}
1954 
1955 	targetproc = proc_find(pid);
1956 	if (targetproc == PROC_NULL) {
1957 		error = ESRCH;
1958 		goto out;
1959 	}
1960 
1961 	if (!task_for_pid_posix_check(targetproc) &&
1962 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1963 		error = EPERM;
1964 		goto out;
1965 	}
1966 
1967 #if CONFIG_MACF
1968 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1969 	if (error) {
1970 		error = EPERM;
1971 		goto out;
1972 	}
1973 #endif
1974 
1975 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1976 	    networking_defunct_callout, args, NULL, NULL);
1977 
1978 out:
1979 	if (targetproc != PROC_NULL) {
1980 		proc_rele(targetproc);
1981 	}
1982 	*ret = error;
1983 	return error;
1984 }
1985 
1986 #endif /* SOCKETS */
1987 
1988 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1989 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1990     __unused int arg2, struct sysctl_req *req)
1991 {
1992 	int error = 0;
1993 	int new_value;
1994 
1995 	error = SYSCTL_OUT(req, arg1, sizeof(int));
1996 	if (error || req->newptr == USER_ADDR_NULL) {
1997 		return error;
1998 	}
1999 
2000 	if (!kauth_cred_issuser(kauth_cred_get())) {
2001 		return EPERM;
2002 	}
2003 
2004 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2005 		goto out;
2006 	}
2007 	if ((new_value == KERN_TFP_POLICY_DENY)
2008 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2009 		tfp_policy = new_value;
2010 	} else {
2011 		error = EINVAL;
2012 	}
2013 out:
2014 	return error;
2015 }
2016 
2017 #if defined(SECURE_KERNEL)
2018 static int kern_secure_kernel = 1;
2019 #else
2020 static int kern_secure_kernel = 0;
2021 #endif
2022 
2023 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2024 
2025 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2026 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2027     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2028 
2029 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2030     &shared_region_trace_level, 0, "");
2031 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2032     &shared_region_version, 0, "");
2033 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2034     &shared_region_persistence, 0, "");
2035 
2036 /*
2037  * shared_region_check_np:
2038  *
2039  * This system call is intended for dyld.
2040  *
2041  * dyld calls this when any process starts to see if the process's shared
2042  * region is already set up and ready to use.
2043  * This call returns the base address of the first mapping in the
2044  * process's shared region's first mapping.
2045  * dyld will then check what's mapped at that address.
2046  *
2047  * If the shared region is empty, dyld will then attempt to map the shared
2048  * cache file in the shared region via the shared_region_map_np() system call.
2049  *
2050  * If something's already mapped in the shared region, dyld will check if it
2051  * matches the shared cache it would like to use for that process.
2052  * If it matches, evrything's ready and the process can proceed and use the
2053  * shared region.
2054  * If it doesn't match, dyld will unmap the shared region and map the shared
2055  * cache into the process's address space via mmap().
2056  *
2057  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2058  * the shared region. We will remove the shared_region reference from the task.
2059  *
2060  * ERROR VALUES
2061  * EINVAL	no shared region
2062  * ENOMEM	shared region is empty
2063  * EFAULT	bad address for "start_address"
2064  */
2065 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2066 shared_region_check_np(
2067 	__unused struct proc                    *p,
2068 	struct shared_region_check_np_args      *uap,
2069 	__unused int                            *retvalp)
2070 {
2071 	vm_shared_region_t      shared_region;
2072 	mach_vm_offset_t        start_address = 0;
2073 	int                     error = 0;
2074 	kern_return_t           kr;
2075 	task_t                  task = current_task();
2076 
2077 	SHARED_REGION_TRACE_DEBUG(
2078 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2079 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2080 		proc_getpid(p), p->p_comm,
2081 		(uint64_t)uap->start_address));
2082 
2083 	/*
2084 	 * Special value of start_address used to indicate that map_with_linking() should
2085 	 * no longer be allowed in this process
2086 	 */
2087 	if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2088 		p->p_disallow_map_with_linking = TRUE;
2089 		return 0;
2090 	}
2091 
2092 	/* retrieve the current tasks's shared region */
2093 	shared_region = vm_shared_region_get(task);
2094 	if (shared_region != NULL) {
2095 		/*
2096 		 * A NULL argument is used by dyld to indicate the task
2097 		 * has unmapped its shared region.
2098 		 */
2099 		if (uap->start_address == 0) {
2100 			/* unmap it first */
2101 			vm_shared_region_remove(task, shared_region);
2102 			vm_shared_region_set(task, NULL);
2103 		} else {
2104 			/* retrieve address of its first mapping... */
2105 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2106 			if (kr != KERN_SUCCESS) {
2107 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2108 				    "check_np(0x%llx) "
2109 				    "vm_shared_region_start_address() failed\n",
2110 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2111 				    proc_getpid(p), p->p_comm,
2112 				    (uint64_t)uap->start_address));
2113 				error = ENOMEM;
2114 			} else {
2115 #if __has_feature(ptrauth_calls)
2116 				/*
2117 				 * Remap any section of the shared library that
2118 				 * has authenticated pointers into private memory.
2119 				 */
2120 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2121 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2122 					    "check_np(0x%llx) "
2123 					    "vm_shared_region_auth_remap() failed\n",
2124 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2125 					    proc_getpid(p), p->p_comm,
2126 					    (uint64_t)uap->start_address));
2127 					error = ENOMEM;
2128 				}
2129 #endif /* __has_feature(ptrauth_calls) */
2130 
2131 				/* ... and give it to the caller */
2132 				if (error == 0) {
2133 					error = copyout(&start_address,
2134 					    (user_addr_t) uap->start_address,
2135 					    sizeof(start_address));
2136 					if (error != 0) {
2137 						SHARED_REGION_TRACE_ERROR(
2138 							("shared_region: %p [%d(%s)] "
2139 							"check_np(0x%llx) "
2140 							"copyout(0x%llx) error %d\n",
2141 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2142 							proc_getpid(p), p->p_comm,
2143 							(uint64_t)uap->start_address, (uint64_t)start_address,
2144 							error));
2145 					}
2146 				}
2147 			}
2148 		}
2149 		vm_shared_region_deallocate(shared_region);
2150 	} else {
2151 		/* no shared region ! */
2152 		error = EINVAL;
2153 	}
2154 
2155 	SHARED_REGION_TRACE_DEBUG(
2156 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2157 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2158 		proc_getpid(p), p->p_comm,
2159 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2160 
2161 	return error;
2162 }
2163 
2164 
2165 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2166 shared_region_copyin(
2167 	struct proc  *p,
2168 	user_addr_t  user_addr,
2169 	unsigned int count,
2170 	unsigned int element_size,
2171 	void         *kernel_data)
2172 {
2173 	int             error = 0;
2174 	vm_size_t       size = count * element_size;
2175 
2176 	error = copyin(user_addr, kernel_data, size);
2177 	if (error) {
2178 		SHARED_REGION_TRACE_ERROR(
2179 			("shared_region: %p [%d(%s)] map(): "
2180 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2181 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2182 			proc_getpid(p), p->p_comm,
2183 			(uint64_t)user_addr, (long)size, error));
2184 	}
2185 	return error;
2186 }
2187 
2188 /*
2189  * A reasonable upper limit to prevent overflow of allocation/copyin.
2190  */
2191 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2192 
2193 /* forward declaration */
2194 __attribute__((noinline))
2195 static void shared_region_map_and_slide_cleanup(
2196 	struct proc              *p,
2197 	uint32_t                 files_count,
2198 	struct _sr_file_mappings *sr_file_mappings,
2199 	struct vm_shared_region  *shared_region);
2200 
2201 /*
2202  * Setup part of _shared_region_map_and_slide().
2203  * It had to be broken out of _shared_region_map_and_slide() to
2204  * prevent compiler inlining from blowing out the stack.
2205  */
2206 __attribute__((noinline))
2207 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2208 shared_region_map_and_slide_setup(
2209 	struct proc                         *p,
2210 	uint32_t                            files_count,
2211 	struct shared_file_np               *files,
2212 	uint32_t                            mappings_count,
2213 	struct shared_file_mapping_slide_np *mappings,
2214 	struct _sr_file_mappings            **sr_file_mappings,
2215 	struct vm_shared_region             **shared_region_ptr,
2216 	struct vnode                        *rdir_vp)
2217 {
2218 	int                             error = 0;
2219 	struct _sr_file_mappings        *srfmp;
2220 	uint32_t                        mappings_next;
2221 	struct vnode_attr               va;
2222 	off_t                           fs;
2223 #if CONFIG_MACF
2224 	vm_prot_t                       maxprot = VM_PROT_ALL;
2225 #endif
2226 	uint32_t                        i;
2227 	struct vm_shared_region         *shared_region = NULL;
2228 	boolean_t                       is_driverkit = task_is_driver(current_task());
2229 
2230 	SHARED_REGION_TRACE_DEBUG(
2231 		("shared_region: %p [%d(%s)] -> map\n",
2232 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2233 		proc_getpid(p), p->p_comm));
2234 
2235 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2236 		error = E2BIG;
2237 		goto done;
2238 	}
2239 	if (files_count == 0) {
2240 		error = EINVAL;
2241 		goto done;
2242 	}
2243 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2244 	    Z_WAITOK | Z_ZERO);
2245 	if (*sr_file_mappings == NULL) {
2246 		error = ENOMEM;
2247 		goto done;
2248 	}
2249 	mappings_next = 0;
2250 	for (i = 0; i < files_count; i++) {
2251 		srfmp = &(*sr_file_mappings)[i];
2252 		srfmp->fd = files[i].sf_fd;
2253 		srfmp->mappings_count = files[i].sf_mappings_count;
2254 		srfmp->mappings = &mappings[mappings_next];
2255 		mappings_next += srfmp->mappings_count;
2256 		if (mappings_next > mappings_count) {
2257 			error = EINVAL;
2258 			goto done;
2259 		}
2260 		srfmp->slide = files[i].sf_slide;
2261 	}
2262 
2263 	/* get the process's shared region (setup in vm_map_exec()) */
2264 	shared_region = vm_shared_region_trim_and_get(current_task());
2265 	*shared_region_ptr = shared_region;
2266 	if (shared_region == NULL) {
2267 		SHARED_REGION_TRACE_ERROR(
2268 			("shared_region: %p [%d(%s)] map(): "
2269 			"no shared region\n",
2270 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2271 			proc_getpid(p), p->p_comm));
2272 		error = EINVAL;
2273 		goto done;
2274 	}
2275 
2276 	/*
2277 	 * Check the shared region matches the current root
2278 	 * directory of this process.  Deny the mapping to
2279 	 * avoid tainting the shared region with something that
2280 	 * doesn't quite belong into it.
2281 	 */
2282 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2283 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2284 		SHARED_REGION_TRACE_ERROR(
2285 			("shared_region: map(%p) root_dir mismatch\n",
2286 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2287 		error = EPERM;
2288 		goto done;
2289 	}
2290 
2291 
2292 	for (srfmp = &(*sr_file_mappings)[0];
2293 	    srfmp < &(*sr_file_mappings)[files_count];
2294 	    srfmp++) {
2295 		if (srfmp->mappings_count == 0) {
2296 			/* no mappings here... */
2297 			continue;
2298 		}
2299 
2300 		/*
2301 		 * A file descriptor of -1 is used to indicate that the data
2302 		 * to be put in the shared region for this mapping comes directly
2303 		 * from the processes address space. Ensure we have proper alignments.
2304 		 */
2305 		if (srfmp->fd == -1) {
2306 			/* only allow one mapping per fd */
2307 			if (srfmp->mappings_count > 1) {
2308 				SHARED_REGION_TRACE_ERROR(
2309 					("shared_region: %p [%d(%s)] map data >1 mapping\n",
2310 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2311 					proc_getpid(p), p->p_comm));
2312 				error = EINVAL;
2313 				goto done;
2314 			}
2315 
2316 			/*
2317 			 * The destination address and size must be page aligned.
2318 			 */
2319 			struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2320 			mach_vm_address_t dest_addr = mapping->sms_address;
2321 			mach_vm_size_t    map_size = mapping->sms_size;
2322 			if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2323 				SHARED_REGION_TRACE_ERROR(
2324 					("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2325 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2326 					proc_getpid(p), p->p_comm, dest_addr));
2327 				error = EINVAL;
2328 				goto done;
2329 			}
2330 			if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2331 				SHARED_REGION_TRACE_ERROR(
2332 					("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2333 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2334 					proc_getpid(p), p->p_comm, map_size));
2335 				error = EINVAL;
2336 				goto done;
2337 			}
2338 			continue;
2339 		}
2340 
2341 		/* get file structure from file descriptor */
2342 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2343 		if (error) {
2344 			SHARED_REGION_TRACE_ERROR(
2345 				("shared_region: %p [%d(%s)] map: "
2346 				"fd=%d lookup failed (error=%d)\n",
2347 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2348 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2349 			goto done;
2350 		}
2351 
2352 		/* we need at least read permission on the file */
2353 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2354 			SHARED_REGION_TRACE_ERROR(
2355 				("shared_region: %p [%d(%s)] map: "
2356 				"fd=%d not readable\n",
2357 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2358 				proc_getpid(p), p->p_comm, srfmp->fd));
2359 			error = EPERM;
2360 			goto done;
2361 		}
2362 
2363 		/* get vnode from file structure */
2364 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2365 		if (error) {
2366 			SHARED_REGION_TRACE_ERROR(
2367 				("shared_region: %p [%d(%s)] map: "
2368 				"fd=%d getwithref failed (error=%d)\n",
2369 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2370 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2371 			goto done;
2372 		}
2373 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2374 
2375 		/* make sure the vnode is a regular file */
2376 		if (srfmp->vp->v_type != VREG) {
2377 			SHARED_REGION_TRACE_ERROR(
2378 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2379 				"not a file (type=%d)\n",
2380 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2381 				proc_getpid(p), p->p_comm,
2382 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2383 				srfmp->vp->v_name, srfmp->vp->v_type));
2384 			error = EINVAL;
2385 			goto done;
2386 		}
2387 
2388 #if CONFIG_MACF
2389 		/* pass in 0 for the offset argument because AMFI does not need the offset
2390 		 *       of the shared cache */
2391 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2392 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2393 		if (error) {
2394 			goto done;
2395 		}
2396 #endif /* MAC */
2397 
2398 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2399 		/*
2400 		 * Check if the shared cache is in the trust cache;
2401 		 * if so, we can skip the root ownership check.
2402 		 */
2403 #if DEVELOPMENT || DEBUG
2404 		/*
2405 		 * Skip both root ownership and trust cache check if
2406 		 * enforcement is disabled.
2407 		 */
2408 		if (!cs_system_enforcement()) {
2409 			goto after_root_check;
2410 		}
2411 #endif /* DEVELOPMENT || DEBUG */
2412 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2413 		if (blob == NULL) {
2414 			SHARED_REGION_TRACE_ERROR(
2415 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2416 				"missing CS blob\n",
2417 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2418 				proc_getpid(p), p->p_comm,
2419 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2420 				srfmp->vp->v_name));
2421 			goto root_check;
2422 		}
2423 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2424 		if (cdhash == NULL) {
2425 			SHARED_REGION_TRACE_ERROR(
2426 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2427 				"missing cdhash\n",
2428 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2429 				proc_getpid(p), p->p_comm,
2430 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2431 				srfmp->vp->v_name));
2432 			goto root_check;
2433 		}
2434 
2435 		bool in_trust_cache = false;
2436 		TrustCacheQueryToken_t qt;
2437 		if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2438 			TCType_t tc_type = kTCTypeInvalid;
2439 			TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2440 			in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2441 			    (tc_type == kTCTypeCryptex1BootOS ||
2442 			    tc_type == kTCTypeStatic ||
2443 			    tc_type == kTCTypeEngineering));
2444 		}
2445 		if (!in_trust_cache) {
2446 			SHARED_REGION_TRACE_ERROR(
2447 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2448 				"not in trust cache\n",
2449 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2450 				proc_getpid(p), p->p_comm,
2451 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2452 				srfmp->vp->v_name));
2453 			goto root_check;
2454 		}
2455 		goto after_root_check;
2456 root_check:
2457 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2458 
2459 		/* The shared cache file must be owned by root */
2460 		VATTR_INIT(&va);
2461 		VATTR_WANTED(&va, va_uid);
2462 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2463 		if (error) {
2464 			SHARED_REGION_TRACE_ERROR(
2465 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2466 				"vnode_getattr(%p) failed (error=%d)\n",
2467 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2468 				proc_getpid(p), p->p_comm,
2469 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2470 				srfmp->vp->v_name,
2471 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2472 				error));
2473 			goto done;
2474 		}
2475 		if (va.va_uid != 0) {
2476 			SHARED_REGION_TRACE_ERROR(
2477 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2478 				"owned by uid=%d instead of 0\n",
2479 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2480 				proc_getpid(p), p->p_comm,
2481 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2482 				srfmp->vp->v_name, va.va_uid));
2483 			error = EPERM;
2484 			goto done;
2485 		}
2486 
2487 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2488 after_root_check:
2489 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2490 
2491 #if CONFIG_CSR
2492 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2493 			VATTR_INIT(&va);
2494 			VATTR_WANTED(&va, va_flags);
2495 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2496 			if (error) {
2497 				SHARED_REGION_TRACE_ERROR(
2498 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2499 					"vnode_getattr(%p) failed (error=%d)\n",
2500 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2501 					proc_getpid(p), p->p_comm,
2502 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2503 					srfmp->vp->v_name,
2504 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2505 					error));
2506 				goto done;
2507 			}
2508 
2509 			if (!(va.va_flags & SF_RESTRICTED)) {
2510 				/*
2511 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2512 				 * the shared cache file is NOT SIP-protected, so reject the
2513 				 * mapping request
2514 				 */
2515 				SHARED_REGION_TRACE_ERROR(
2516 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2517 					"vnode is not SIP-protected. \n",
2518 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2519 					proc_getpid(p), p->p_comm,
2520 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2521 					srfmp->vp->v_name));
2522 				error = EPERM;
2523 				goto done;
2524 			}
2525 		}
2526 #else /* CONFIG_CSR */
2527 
2528 		/*
2529 		 * Devices without SIP/ROSP need to make sure that the shared cache
2530 		 * is either on the root volume or in the preboot cryptex volume.
2531 		 */
2532 		assert(rdir_vp != NULL);
2533 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2534 			vnode_t preboot_vp = NULL;
2535 #if XNU_TARGET_OS_OSX
2536 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2537 #else
2538 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2539 #endif
2540 			error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2541 			if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2542 				SHARED_REGION_TRACE_ERROR(
2543 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2544 					"not on process' root volume nor preboot volume\n",
2545 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2546 					proc_getpid(p), p->p_comm,
2547 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2548 					srfmp->vp->v_name));
2549 				error = EPERM;
2550 				if (preboot_vp) {
2551 					(void)vnode_put(preboot_vp);
2552 				}
2553 				goto done;
2554 			} else if (preboot_vp) {
2555 				(void)vnode_put(preboot_vp);
2556 			}
2557 		}
2558 #endif /* CONFIG_CSR */
2559 
2560 		if (scdir_enforce) {
2561 			char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2562 			struct vnode *scdir_vp = NULL;
2563 			for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2564 			    *expected_scdir_path != NULL;
2565 			    expected_scdir_path++) {
2566 				/* get vnode for expected_scdir_path */
2567 				error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2568 				if (error) {
2569 					SHARED_REGION_TRACE_ERROR(
2570 						("shared_region: %p [%d(%s)]: "
2571 						"vnode_lookup(%s) failed (error=%d)\n",
2572 						(void *)VM_KERNEL_ADDRPERM(current_thread()),
2573 						proc_getpid(p), p->p_comm,
2574 						*expected_scdir_path, error));
2575 					continue;
2576 				}
2577 
2578 				/* check if parent is scdir_vp */
2579 				assert(scdir_vp != NULL);
2580 				if (vnode_parent(srfmp->vp) == scdir_vp) {
2581 					(void)vnode_put(scdir_vp);
2582 					scdir_vp = NULL;
2583 					goto scdir_ok;
2584 				}
2585 				(void)vnode_put(scdir_vp);
2586 				scdir_vp = NULL;
2587 			}
2588 			/* nothing matches */
2589 			SHARED_REGION_TRACE_ERROR(
2590 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2591 				"shared cache file not in expected directory\n",
2592 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2593 				proc_getpid(p), p->p_comm,
2594 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2595 				srfmp->vp->v_name));
2596 			error = EPERM;
2597 			goto done;
2598 		}
2599 scdir_ok:
2600 
2601 		/* get vnode size */
2602 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2603 		if (error) {
2604 			SHARED_REGION_TRACE_ERROR(
2605 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2606 				"vnode_size(%p) failed (error=%d)\n",
2607 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2608 				proc_getpid(p), p->p_comm,
2609 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2610 				srfmp->vp->v_name,
2611 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2612 			goto done;
2613 		}
2614 		srfmp->file_size = fs;
2615 
2616 		/* get the file's memory object handle */
2617 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2618 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2619 			SHARED_REGION_TRACE_ERROR(
2620 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2621 				"no memory object\n",
2622 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2623 				proc_getpid(p), p->p_comm,
2624 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2625 				srfmp->vp->v_name));
2626 			error = EINVAL;
2627 			goto done;
2628 		}
2629 
2630 		/* check that the mappings are properly covered by code signatures */
2631 		if (!cs_system_enforcement()) {
2632 			/* code signing is not enforced: no need to check */
2633 		} else {
2634 			for (i = 0; i < srfmp->mappings_count; i++) {
2635 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2636 					/* zero-filled mapping: not backed by the file */
2637 					continue;
2638 				}
2639 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2640 				    srfmp->mappings[i].sms_file_offset,
2641 				    srfmp->mappings[i].sms_size)) {
2642 					/* this mapping is fully covered by code signatures */
2643 					continue;
2644 				}
2645 				SHARED_REGION_TRACE_ERROR(
2646 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2647 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2648 					"is not code-signed\n",
2649 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2650 					proc_getpid(p), p->p_comm,
2651 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2652 					srfmp->vp->v_name,
2653 					i, srfmp->mappings_count,
2654 					srfmp->mappings[i].sms_address,
2655 					srfmp->mappings[i].sms_size,
2656 					srfmp->mappings[i].sms_file_offset,
2657 					srfmp->mappings[i].sms_max_prot,
2658 					srfmp->mappings[i].sms_init_prot));
2659 				error = EINVAL;
2660 				goto done;
2661 			}
2662 		}
2663 	}
2664 done:
2665 	if (error != 0) {
2666 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2667 		*sr_file_mappings = NULL;
2668 		*shared_region_ptr = NULL;
2669 	}
2670 	return error;
2671 }
2672 
2673 /*
2674  * shared_region_map_np()
2675  *
2676  * This system call is intended for dyld.
2677  *
2678  * dyld uses this to map a shared cache file into a shared region.
2679  * This is usually done only the first time a shared cache is needed.
2680  * Subsequent processes will just use the populated shared region without
2681  * requiring any further setup.
2682  */
2683 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2684 _shared_region_map_and_slide(
2685 	struct proc                         *p,
2686 	uint32_t                            files_count,
2687 	struct shared_file_np               *files,
2688 	uint32_t                            mappings_count,
2689 	struct shared_file_mapping_slide_np *mappings)
2690 {
2691 	int                             error = 0;
2692 	kern_return_t                   kr = KERN_SUCCESS;
2693 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2694 	struct vnode                    *rdir_vp = NULL;
2695 	struct vm_shared_region         *shared_region = NULL;
2696 
2697 	/*
2698 	 * Get a reference to the current proc's root dir.
2699 	 * Need this to prevent racing with chroot.
2700 	 */
2701 	proc_fdlock(p);
2702 	rdir_vp = p->p_fd.fd_rdir;
2703 	if (rdir_vp == NULL) {
2704 		rdir_vp = rootvnode;
2705 	}
2706 	assert(rdir_vp != NULL);
2707 	vnode_get(rdir_vp);
2708 	proc_fdunlock(p);
2709 
2710 	/*
2711 	 * Turn files, mappings into sr_file_mappings and other setup.
2712 	 */
2713 	error = shared_region_map_and_slide_setup(p, files_count,
2714 	    files, mappings_count, mappings,
2715 	    &sr_file_mappings, &shared_region, rdir_vp);
2716 	if (error != 0) {
2717 		vnode_put(rdir_vp);
2718 		return error;
2719 	}
2720 
2721 	/* map the file(s) into that shared region's submap */
2722 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2723 	if (kr != KERN_SUCCESS) {
2724 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2725 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2726 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2727 		    proc_getpid(p), p->p_comm, kr));
2728 	}
2729 
2730 	/* convert kern_return_t to errno */
2731 	switch (kr) {
2732 	case KERN_SUCCESS:
2733 		error = 0;
2734 		break;
2735 	case KERN_INVALID_ADDRESS:
2736 		error = EFAULT;
2737 		break;
2738 	case KERN_PROTECTION_FAILURE:
2739 		error = EPERM;
2740 		break;
2741 	case KERN_NO_SPACE:
2742 		error = ENOMEM;
2743 		break;
2744 	case KERN_FAILURE:
2745 	case KERN_INVALID_ARGUMENT:
2746 	default:
2747 		error = EINVAL;
2748 		break;
2749 	}
2750 
2751 	/*
2752 	 * Mark that this process is now using split libraries.
2753 	 */
2754 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2755 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2756 	}
2757 
2758 	vnode_put(rdir_vp);
2759 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2760 
2761 	SHARED_REGION_TRACE_DEBUG(
2762 		("shared_region: %p [%d(%s)] <- map\n",
2763 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2764 		proc_getpid(p), p->p_comm));
2765 
2766 	return error;
2767 }
2768 
2769 /*
2770  * Clean up part of _shared_region_map_and_slide()
2771  * It had to be broken out of _shared_region_map_and_slide() to
2772  * prevent compiler inlining from blowing out the stack.
2773  */
2774 __attribute__((noinline))
2775 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2776 shared_region_map_and_slide_cleanup(
2777 	struct proc              *p,
2778 	uint32_t                 files_count,
2779 	struct _sr_file_mappings *sr_file_mappings,
2780 	struct vm_shared_region  *shared_region)
2781 {
2782 	struct _sr_file_mappings *srfmp;
2783 	struct vnode_attr        va;
2784 
2785 	if (sr_file_mappings != NULL) {
2786 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2787 			if (srfmp->vp != NULL) {
2788 				vnode_lock_spin(srfmp->vp);
2789 				srfmp->vp->v_flag |= VSHARED_DYLD;
2790 				vnode_unlock(srfmp->vp);
2791 
2792 				/* update the vnode's access time */
2793 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2794 					VATTR_INIT(&va);
2795 					nanotime(&va.va_access_time);
2796 					VATTR_SET_ACTIVE(&va, va_access_time);
2797 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2798 				}
2799 
2800 #if NAMEDSTREAMS
2801 				/*
2802 				 * If the shared cache is compressed, it may
2803 				 * have a namedstream vnode instantiated for
2804 				 * for it. That namedstream vnode will also
2805 				 * have to be marked with VSHARED_DYLD.
2806 				 */
2807 				if (vnode_hasnamedstreams(srfmp->vp)) {
2808 					vnode_t svp;
2809 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2810 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2811 						vnode_lock_spin(svp);
2812 						svp->v_flag |= VSHARED_DYLD;
2813 						vnode_unlock(svp);
2814 						vnode_put(svp);
2815 					}
2816 				}
2817 #endif /* NAMEDSTREAMS */
2818 				/*
2819 				 * release the vnode...
2820 				 * ubc_map() still holds it for us in the non-error case
2821 				 */
2822 				(void) vnode_put(srfmp->vp);
2823 				srfmp->vp = NULL;
2824 			}
2825 			if (srfmp->fp != NULL) {
2826 				/* release the file descriptor */
2827 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2828 				srfmp->fp = NULL;
2829 			}
2830 		}
2831 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2832 	}
2833 
2834 	if (shared_region != NULL) {
2835 		vm_shared_region_deallocate(shared_region);
2836 	}
2837 }
2838 
2839 
2840 /*
2841  * For each file mapped, we may have mappings for:
2842  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2843  * so let's round up to 8 mappings per file.
2844  */
2845 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2846 
2847 /*
2848  * This is the new interface for setting up shared region mappings.
2849  *
2850  * The slide used for shared regions setup using this interface is done differently
2851  * from the old interface. The slide value passed in the shared_files_np represents
2852  * a max value. The kernel will choose a random value based on that, then use it
2853  * for all shared regions.
2854  */
2855 #if defined (__x86_64__)
2856 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2857 #else
2858 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2859 #endif
2860 
2861 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2862 shared_region_map_and_slide_2_np(
2863 	struct proc                                  *p,
2864 	struct shared_region_map_and_slide_2_np_args *uap,
2865 	__unused int                                 *retvalp)
2866 {
2867 	unsigned int                  files_count;
2868 	struct shared_file_np         *shared_files = NULL;
2869 	unsigned int                  mappings_count;
2870 	struct shared_file_mapping_slide_np *mappings = NULL;
2871 	kern_return_t                 kr = KERN_SUCCESS;
2872 
2873 	files_count = uap->files_count;
2874 	mappings_count = uap->mappings_count;
2875 
2876 	if (files_count == 0) {
2877 		SHARED_REGION_TRACE_INFO(
2878 			("shared_region: %p [%d(%s)] map(): "
2879 			"no files\n",
2880 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2881 			proc_getpid(p), p->p_comm));
2882 		kr = 0; /* no files to map: we're done ! */
2883 		goto done;
2884 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2885 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2886 		if (shared_files == NULL) {
2887 			kr = KERN_RESOURCE_SHORTAGE;
2888 			goto done;
2889 		}
2890 	} else {
2891 		SHARED_REGION_TRACE_ERROR(
2892 			("shared_region: %p [%d(%s)] map(): "
2893 			"too many files (%d) max %d\n",
2894 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2895 			proc_getpid(p), p->p_comm,
2896 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2897 		kr = KERN_FAILURE;
2898 		goto done;
2899 	}
2900 
2901 	if (mappings_count == 0) {
2902 		SHARED_REGION_TRACE_INFO(
2903 			("shared_region: %p [%d(%s)] map(): "
2904 			"no mappings\n",
2905 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2906 			proc_getpid(p), p->p_comm));
2907 		kr = 0; /* no mappings: we're done ! */
2908 		goto done;
2909 	} else if (mappings_count <= SFM_MAX) {
2910 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2911 		if (mappings == NULL) {
2912 			kr = KERN_RESOURCE_SHORTAGE;
2913 			goto done;
2914 		}
2915 	} else {
2916 		SHARED_REGION_TRACE_ERROR(
2917 			("shared_region: %p [%d(%s)] map(): "
2918 			"too many mappings (%d) max %d\n",
2919 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2920 			proc_getpid(p), p->p_comm,
2921 			mappings_count, SFM_MAX));
2922 		kr = KERN_FAILURE;
2923 		goto done;
2924 	}
2925 
2926 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2927 	if (kr != KERN_SUCCESS) {
2928 		goto done;
2929 	}
2930 
2931 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2932 	if (kr != KERN_SUCCESS) {
2933 		goto done;
2934 	}
2935 
2936 	uint32_t max_slide = shared_files[0].sf_slide;
2937 	uint32_t random_val;
2938 	uint32_t slide_amount;
2939 
2940 	if (max_slide != 0) {
2941 		read_random(&random_val, sizeof random_val);
2942 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2943 	} else {
2944 		slide_amount = 0;
2945 	}
2946 #if DEVELOPMENT || DEBUG
2947 	extern bool bootarg_disable_aslr;
2948 	if (bootarg_disable_aslr) {
2949 		slide_amount = 0;
2950 	}
2951 #endif /* DEVELOPMENT || DEBUG */
2952 
2953 	/*
2954 	 * Fix up the mappings to reflect the desired slide.
2955 	 */
2956 	unsigned int f;
2957 	unsigned int m = 0;
2958 	unsigned int i;
2959 	for (f = 0; f < files_count; ++f) {
2960 		shared_files[f].sf_slide = slide_amount;
2961 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2962 			if (m >= mappings_count) {
2963 				SHARED_REGION_TRACE_ERROR(
2964 					("shared_region: %p [%d(%s)] map(): "
2965 					"mapping count argument was too small\n",
2966 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2967 					proc_getpid(p), p->p_comm));
2968 				kr = KERN_FAILURE;
2969 				goto done;
2970 			}
2971 			mappings[m].sms_address += slide_amount;
2972 			if (mappings[m].sms_slide_size != 0) {
2973 				mappings[m].sms_slide_start += slide_amount;
2974 			}
2975 		}
2976 	}
2977 
2978 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2979 done:
2980 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2981 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2982 	return kr;
2983 }
2984 
2985 /*
2986  * A syscall for dyld to use to map data pages that need load time relocation fixups.
2987  * The fixups are performed by a custom pager during page-in, so the pages still appear
2988  * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2989  * on demand later, all w/o using the compressor.
2990  *
2991  * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2992  * running, they are COW'd as normal.
2993  */
2994 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2995 map_with_linking_np(
2996 	struct proc                     *p,
2997 	struct map_with_linking_np_args *uap,
2998 	__unused int                    *retvalp)
2999 {
3000 	uint32_t                        region_count;
3001 	uint32_t                        r;
3002 	struct mwl_region               *regions = NULL;
3003 	struct mwl_region               *rp;
3004 	uint32_t                        link_info_size;
3005 	void                            *link_info = NULL;      /* starts with a struct mwl_info_hdr */
3006 	struct mwl_info_hdr             *info_hdr = NULL;
3007 	uint64_t                        binds_size;
3008 	int                             fd;
3009 	struct fileproc                 *fp = NULL;
3010 	struct vnode                    *vp = NULL;
3011 	size_t                          file_size;
3012 	off_t                           fs;
3013 	struct vnode_attr               va;
3014 	memory_object_control_t         file_control = NULL;
3015 	int                             error;
3016 	kern_return_t                   kr = KERN_SUCCESS;
3017 
3018 	/*
3019 	 * Check if dyld has told us it finished with this call.
3020 	 */
3021 	if (p->p_disallow_map_with_linking) {
3022 		printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3023 		    __func__, proc_getpid(p), p->p_comm);
3024 		kr = KERN_FAILURE;
3025 		goto done;
3026 	}
3027 
3028 	/*
3029 	 * First we do some sanity checking on what dyld has passed us.
3030 	 */
3031 	region_count = uap->region_count;
3032 	link_info_size = uap->link_info_size;
3033 	if (region_count == 0) {
3034 		printf("%s: [%d(%s)]: region_count == 0\n",
3035 		    __func__, proc_getpid(p), p->p_comm);
3036 		kr = KERN_FAILURE;
3037 		goto done;
3038 	}
3039 	if (region_count > MWL_MAX_REGION_COUNT) {
3040 		printf("%s: [%d(%s)]: region_count too big %d\n",
3041 		    __func__, proc_getpid(p), p->p_comm, region_count);
3042 		kr = KERN_FAILURE;
3043 		goto done;
3044 	}
3045 
3046 	if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3047 		printf("%s: [%d(%s)]: link_info_size too small\n",
3048 		    __func__, proc_getpid(p), p->p_comm);
3049 		kr = KERN_FAILURE;
3050 		goto done;
3051 	}
3052 	if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3053 		printf("%s: [%d(%s)]: link_info_size too big %d\n",
3054 		    __func__, proc_getpid(p), p->p_comm, link_info_size);
3055 		kr = KERN_FAILURE;
3056 		goto done;
3057 	}
3058 
3059 	/*
3060 	 * Allocate and copyin the regions and link info
3061 	 */
3062 	regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3063 	if (regions == NULL) {
3064 		printf("%s: [%d(%s)]: failed to allocate regions\n",
3065 		    __func__, proc_getpid(p), p->p_comm);
3066 		kr = KERN_RESOURCE_SHORTAGE;
3067 		goto done;
3068 	}
3069 	kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3070 	if (kr != KERN_SUCCESS) {
3071 		printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3072 		    __func__, proc_getpid(p), p->p_comm, kr);
3073 		goto done;
3074 	}
3075 
3076 	link_info = kalloc_data(link_info_size, Z_WAITOK);
3077 	if (link_info == NULL) {
3078 		printf("%s: [%d(%s)]: failed to allocate link_info\n",
3079 		    __func__, proc_getpid(p), p->p_comm);
3080 		kr = KERN_RESOURCE_SHORTAGE;
3081 		goto done;
3082 	}
3083 	kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3084 	if (kr != KERN_SUCCESS) {
3085 		printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3086 		    __func__, proc_getpid(p), p->p_comm, kr);
3087 		goto done;
3088 	}
3089 
3090 	/*
3091 	 * Do some verification the data structures.
3092 	 */
3093 	info_hdr = (struct mwl_info_hdr *)link_info;
3094 	if (info_hdr->mwli_version != MWL_INFO_VERS) {
3095 		printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3096 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3097 		kr = KERN_FAILURE;
3098 		goto done;
3099 	}
3100 
3101 	if (info_hdr->mwli_binds_offset > link_info_size) {
3102 		printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3103 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3104 		kr = KERN_FAILURE;
3105 		goto done;
3106 	}
3107 
3108 	/* some older devs have s/w page size > h/w page size, no need to support them */
3109 	if (info_hdr->mwli_page_size != PAGE_SIZE) {
3110 		/* no printf, since this is expected on some devices */
3111 		kr = KERN_INVALID_ARGUMENT;
3112 		goto done;
3113 	}
3114 
3115 	binds_size = (uint64_t)info_hdr->mwli_binds_count *
3116 	    ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3117 	if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3118 		printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3119 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3120 		kr = KERN_FAILURE;
3121 		goto done;
3122 	}
3123 
3124 	if (info_hdr->mwli_chains_offset > link_info_size) {
3125 		printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3126 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3127 		kr = KERN_FAILURE;
3128 		goto done;
3129 	}
3130 
3131 
3132 	/*
3133 	 * Ensure the chained starts in the link info and make sure the
3134 	 * segment info offsets are within bounds.
3135 	 */
3136 	if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3137 		printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3138 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3139 		kr = KERN_FAILURE;
3140 		goto done;
3141 	}
3142 	if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3143 		printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3144 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3145 		kr = KERN_FAILURE;
3146 		goto done;
3147 	}
3148 
3149 	/* Note that more verification of offsets is done in the pager itself */
3150 
3151 	/*
3152 	 * Ensure we've only been given one FD and verify valid protections.
3153 	 */
3154 	fd = regions[0].mwlr_fd;
3155 	for (r = 0; r < region_count; ++r) {
3156 		if (regions[r].mwlr_fd != fd) {
3157 			printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3158 			    __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3159 			kr = KERN_FAILURE;
3160 			goto done;
3161 		}
3162 		regions[r].mwlr_protections &= VM_PROT_ALL;
3163 		if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3164 			printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3165 			    __func__, proc_getpid(p), p->p_comm);
3166 			kr = KERN_FAILURE;
3167 			goto done;
3168 		}
3169 	}
3170 
3171 
3172 	/* get file structure from file descriptor */
3173 	error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3174 	if (error) {
3175 		printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3176 		    __func__, proc_getpid(p), p->p_comm, error);
3177 		kr = KERN_FAILURE;
3178 		goto done;
3179 	}
3180 
3181 	/* We need at least read permission on the file */
3182 	if (!(fp->fp_glob->fg_flag & FREAD)) {
3183 		printf("%s: [%d(%s)]: not readable\n",
3184 		    __func__, proc_getpid(p), p->p_comm);
3185 		kr = KERN_FAILURE;
3186 		goto done;
3187 	}
3188 
3189 	/* Get the vnode from file structure */
3190 	vp = (struct vnode *)fp_get_data(fp);
3191 	error = vnode_getwithref(vp);
3192 	if (error) {
3193 		printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3194 		    __func__, proc_getpid(p), p->p_comm, error);
3195 		kr = KERN_FAILURE;
3196 		vp = NULL; /* just to be sure */
3197 		goto done;
3198 	}
3199 
3200 	/* Make sure the vnode is a regular file */
3201 	if (vp->v_type != VREG) {
3202 		printf("%s: [%d(%s)]: vnode not VREG\n",
3203 		    __func__, proc_getpid(p), p->p_comm);
3204 		kr = KERN_FAILURE;
3205 		goto done;
3206 	}
3207 
3208 	/* get vnode size */
3209 	error = vnode_size(vp, &fs, vfs_context_current());
3210 	if (error) {
3211 		goto done;
3212 	}
3213 	file_size = fs;
3214 
3215 	/* get the file's memory object handle */
3216 	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3217 	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3218 		printf("%s: [%d(%s)]: no memory object\n",
3219 		    __func__, proc_getpid(p), p->p_comm);
3220 		kr = KERN_FAILURE;
3221 		goto done;
3222 	}
3223 
3224 	for (r = 0; r < region_count; ++r) {
3225 		rp = &regions[r];
3226 
3227 		/*
3228 		 * Only allow data mappings and not zero fill.
3229 		 */
3230 		if (rp->mwlr_protections & VM_PROT_ZF) {
3231 			printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF\n",
3232 			    __func__, proc_getpid(p), p->p_comm, r);
3233 			kr = KERN_FAILURE;
3234 			goto done;
3235 		}
3236 		if (rp->mwlr_protections & VM_PROT_EXECUTE) {
3237 			printf("%s: [%d(%s)]: region %d, found VM_PROT_EXECUTE\n",
3238 			    __func__, proc_getpid(p), p->p_comm, r);
3239 			kr = KERN_FAILURE;
3240 			goto done;
3241 		}
3242 
3243 #if CONFIG_MACF
3244 		vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3245 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3246 		    fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3247 		if (error) {
3248 			printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3249 			    __func__, proc_getpid(p), p->p_comm, r, error);
3250 			kr = KERN_FAILURE;
3251 			goto done;
3252 		}
3253 #endif /* MAC */
3254 
3255 		/* check that the mappings are properly covered by code signatures */
3256 		if (cs_system_enforcement()) {
3257 			if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3258 				printf("%s: [%d(%s)]: region %d, not code signed\n",
3259 				    __func__, proc_getpid(p), p->p_comm, r);
3260 				kr = KERN_FAILURE;
3261 				goto done;
3262 			}
3263 		}
3264 	}
3265 
3266 	/* update the vnode's access time */
3267 	if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3268 		VATTR_INIT(&va);
3269 		nanotime(&va.va_access_time);
3270 		VATTR_SET_ACTIVE(&va, va_access_time);
3271 		vnode_setattr(vp, &va, vfs_context_current());
3272 	}
3273 
3274 	/* get the VM to do the work */
3275 	kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3276 
3277 done:
3278 	if (fp != NULL) {
3279 		/* release the file descriptor */
3280 		fp_drop(p, fd, fp, 0);
3281 	}
3282 	if (vp != NULL) {
3283 		(void)vnode_put(vp);
3284 	}
3285 	if (regions != NULL) {
3286 		kfree_data(regions, region_count * sizeof(regions[0]));
3287 	}
3288 	/* link info is used in the pager if things worked */
3289 	if (link_info != NULL && kr != KERN_SUCCESS) {
3290 		kfree_data(link_info, link_info_size);
3291 	}
3292 
3293 	switch (kr) {
3294 	case KERN_SUCCESS:
3295 		return 0;
3296 	case KERN_RESOURCE_SHORTAGE:
3297 		return ENOMEM;
3298 	default:
3299 		return EINVAL;
3300 	}
3301 }
3302 
3303 #if DEBUG || DEVELOPMENT
3304 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3305     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3306 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3307     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3308 #endif /* DEBUG || DEVELOPMENT */
3309 
3310 /* sysctl overflow room */
3311 
3312 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3313     (int *) &page_size, 0, "vm page size");
3314 
3315 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3316  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3317  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3318 extern unsigned int     vm_page_free_target;
3319 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3320     &vm_page_free_target, 0, "Pageout daemon free target");
3321 
3322 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3323     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3324 
3325 static int
3326 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3327 {
3328 #pragma unused(oidp, arg1, arg2)
3329 	unsigned int page_free_wanted;
3330 
3331 	page_free_wanted = mach_vm_ctl_page_free_wanted();
3332 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3333 }
3334 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3335     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3336     0, 0, vm_ctl_page_free_wanted, "I", "");
3337 
3338 extern unsigned int     vm_page_purgeable_count;
3339 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3340     &vm_page_purgeable_count, 0, "Purgeable page count");
3341 
3342 extern unsigned int     vm_page_purgeable_wired_count;
3343 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3344     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3345 
3346 extern unsigned int vm_page_kern_lpage_count;
3347 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3348     &vm_page_kern_lpage_count, 0, "kernel used large pages");
3349 
3350 #if DEVELOPMENT || DEBUG
3351 #if __ARM_MIXED_PAGE_SIZE__
3352 static int vm_mixed_pagesize_supported = 1;
3353 #else
3354 static int vm_mixed_pagesize_supported = 0;
3355 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3356 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3357     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3358 
3359 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3360 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3361 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3362     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3363 
3364 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3365     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3366 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3367     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3368 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3369     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3370 
3371 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3372     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3373 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3374     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3375 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3376     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3377 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3378     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3379 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3380     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3381 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3382     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3383 #endif /* DEVELOPMENT || DEBUG */
3384 
3385 extern int madvise_free_debug;
3386 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3387     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3388 extern int madvise_free_debug_sometimes;
3389 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3390     &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3391 
3392 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3393     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3394 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3395     &vm_page_stats_reusable.reusable_pages_success, "");
3396 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3397     &vm_page_stats_reusable.reusable_pages_failure, "");
3398 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3399     &vm_page_stats_reusable.reusable_pages_shared, "");
3400 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3401     &vm_page_stats_reusable.all_reusable_calls, "");
3402 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3403     &vm_page_stats_reusable.partial_reusable_calls, "");
3404 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3405     &vm_page_stats_reusable.reuse_pages_success, "");
3406 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3407     &vm_page_stats_reusable.reuse_pages_failure, "");
3408 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3409     &vm_page_stats_reusable.all_reuse_calls, "");
3410 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3411     &vm_page_stats_reusable.partial_reuse_calls, "");
3412 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3413     &vm_page_stats_reusable.can_reuse_success, "");
3414 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3415     &vm_page_stats_reusable.can_reuse_failure, "");
3416 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3417     &vm_page_stats_reusable.reusable_reclaimed, "");
3418 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3419     &vm_page_stats_reusable.reusable_nonwritable, "");
3420 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3421     &vm_page_stats_reusable.reusable_shared, "");
3422 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3423     &vm_page_stats_reusable.free_shared, "");
3424 
3425 
3426 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3427 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3428 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3429 
3430 extern unsigned int vm_page_cleaned_count;
3431 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3432 
3433 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3434 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3435 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3436 
3437 /* pageout counts */
3438 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3439 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3440 
3441 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3442 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3443 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3444 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3445 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3446 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3447 
3448 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3449 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3450 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3451 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3452 extern unsigned int vm_page_realtime_count;
3453 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3454 extern int vm_pageout_protect_realtime;
3455 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3456 
3457 /* counts of pages prefaulted when entering a memory object */
3458 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3459 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3460 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3461 
3462 #if defined (__x86_64__)
3463 extern unsigned int vm_clump_promote_threshold;
3464 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3465 #if DEVELOPMENT || DEBUG
3466 extern unsigned long vm_clump_stats[];
3467 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3468 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3469 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3470 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3471 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3472 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3473 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3474 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3475 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3476 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3477 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3478 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3479 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3480 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3481 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3482 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3483 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3488 #endif  /* if DEVELOPMENT || DEBUG */
3489 #endif  /* #if defined (__x86_64__) */
3490 
3491 #if CONFIG_SECLUDED_MEMORY
3492 
3493 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3494 extern unsigned int vm_page_secluded_target;
3495 extern unsigned int vm_page_secluded_count;
3496 extern unsigned int vm_page_secluded_count_free;
3497 extern unsigned int vm_page_secluded_count_inuse;
3498 extern unsigned int vm_page_secluded_count_over_target;
3499 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3500 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3501 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3502 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3503 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3504 
3505 extern struct vm_page_secluded_data vm_page_secluded;
3506 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3507 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3508 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3509 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3510 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3511 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3512 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3513 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3514 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3515 
3516 #endif /* CONFIG_SECLUDED_MEMORY */
3517 
3518 #include <kern/thread.h>
3519 #include <sys/user.h>
3520 
3521 void vm_pageout_io_throttle(void);
3522 
3523 void
vm_pageout_io_throttle(void)3524 vm_pageout_io_throttle(void)
3525 {
3526 	struct uthread *uthread = current_uthread();
3527 
3528 	/*
3529 	 * thread is marked as a low priority I/O type
3530 	 * and the I/O we issued while in this cleaning operation
3531 	 * collided with normal I/O operations... we'll
3532 	 * delay in order to mitigate the impact of this
3533 	 * task on the normal operation of the system
3534 	 */
3535 
3536 	if (uthread->uu_lowpri_window) {
3537 		throttle_lowpri_io(1);
3538 	}
3539 }
3540 
3541 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3542 vm_pressure_monitor(
3543 	__unused struct proc *p,
3544 	struct vm_pressure_monitor_args *uap,
3545 	int *retval)
3546 {
3547 	kern_return_t   kr;
3548 	uint32_t        pages_reclaimed;
3549 	uint32_t        pages_wanted;
3550 
3551 	kr = mach_vm_pressure_monitor(
3552 		(boolean_t) uap->wait_for_pressure,
3553 		uap->nsecs_monitored,
3554 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3555 		&pages_wanted);
3556 
3557 	switch (kr) {
3558 	case KERN_SUCCESS:
3559 		break;
3560 	case KERN_ABORTED:
3561 		return EINTR;
3562 	default:
3563 		return EINVAL;
3564 	}
3565 
3566 	if (uap->pages_reclaimed) {
3567 		if (copyout((void *)&pages_reclaimed,
3568 		    uap->pages_reclaimed,
3569 		    sizeof(pages_reclaimed)) != 0) {
3570 			return EFAULT;
3571 		}
3572 	}
3573 
3574 	*retval = (int) pages_wanted;
3575 	return 0;
3576 }
3577 
3578 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3579 kas_info(struct proc *p,
3580     struct kas_info_args *uap,
3581     int *retval __unused)
3582 {
3583 #ifndef CONFIG_KAS_INFO
3584 	(void)p;
3585 	(void)uap;
3586 	return ENOTSUP;
3587 #else /* CONFIG_KAS_INFO */
3588 	int                     selector = uap->selector;
3589 	user_addr_t     valuep = uap->value;
3590 	user_addr_t     sizep = uap->size;
3591 	user_size_t size, rsize;
3592 	int                     error;
3593 
3594 	if (!kauth_cred_issuser(kauth_cred_get())) {
3595 		return EPERM;
3596 	}
3597 
3598 #if CONFIG_MACF
3599 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3600 	if (error) {
3601 		return error;
3602 	}
3603 #endif
3604 
3605 	if (IS_64BIT_PROCESS(p)) {
3606 		user64_size_t size64;
3607 		error = copyin(sizep, &size64, sizeof(size64));
3608 		size = (user_size_t)size64;
3609 	} else {
3610 		user32_size_t size32;
3611 		error = copyin(sizep, &size32, sizeof(size32));
3612 		size = (user_size_t)size32;
3613 	}
3614 	if (error) {
3615 		return error;
3616 	}
3617 
3618 	switch (selector) {
3619 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3620 	{
3621 		uint64_t slide = vm_kernel_slide;
3622 
3623 		if (sizeof(slide) != size) {
3624 			return EINVAL;
3625 		}
3626 
3627 		error = copyout(&slide, valuep, sizeof(slide));
3628 		if (error) {
3629 			return error;
3630 		}
3631 		rsize = size;
3632 	}
3633 	break;
3634 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3635 	{
3636 		uint32_t i;
3637 		kernel_mach_header_t *mh = &_mh_execute_header;
3638 		struct load_command *cmd;
3639 		cmd = (struct load_command*) &mh[1];
3640 		uint64_t *bases;
3641 		rsize = mh->ncmds * sizeof(uint64_t);
3642 
3643 		/*
3644 		 * Return the size if no data was passed
3645 		 */
3646 		if (valuep == 0) {
3647 			break;
3648 		}
3649 
3650 		if (rsize > size) {
3651 			return EINVAL;
3652 		}
3653 
3654 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3655 
3656 		for (i = 0; i < mh->ncmds; i++) {
3657 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3658 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3659 				bases[i] = (uint64_t)sg->vmaddr;
3660 			}
3661 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3662 		}
3663 
3664 		error = copyout(bases, valuep, rsize);
3665 
3666 		kfree_data(bases, rsize);
3667 
3668 		if (error) {
3669 			return error;
3670 		}
3671 	}
3672 	break;
3673 	default:
3674 		return EINVAL;
3675 	}
3676 
3677 	if (IS_64BIT_PROCESS(p)) {
3678 		user64_size_t size64 = (user64_size_t)rsize;
3679 		error = copyout(&size64, sizep, sizeof(size64));
3680 	} else {
3681 		user32_size_t size32 = (user32_size_t)rsize;
3682 		error = copyout(&size32, sizep, sizeof(size32));
3683 	}
3684 
3685 	return error;
3686 #endif /* CONFIG_KAS_INFO */
3687 }
3688 
3689 #if __has_feature(ptrauth_calls)
3690 /*
3691  * Generate a random pointer signing key that isn't 0.
3692  */
3693 uint64_t
generate_jop_key(void)3694 generate_jop_key(void)
3695 {
3696 	uint64_t key;
3697 
3698 	do {
3699 		read_random(&key, sizeof key);
3700 	} while (key == 0);
3701 	return key;
3702 }
3703 #endif /* __has_feature(ptrauth_calls) */
3704 
3705 
3706 #pragma clang diagnostic push
3707 #pragma clang diagnostic ignored "-Wcast-qual"
3708 #pragma clang diagnostic ignored "-Wunused-function"
3709 
3710 static void
asserts()3711 asserts()
3712 {
3713 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3714 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3715 }
3716 
3717 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3718 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3719 #pragma clang diagnostic pop
3720 
3721 extern uint32_t vm_page_pages;
3722 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3723 
3724 extern uint32_t vm_page_busy_absent_skipped;
3725 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3726 
3727 extern uint32_t vm_page_upl_tainted;
3728 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3729 
3730 extern uint32_t vm_page_iopl_tainted;
3731 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3732 
3733 #if __arm64__ && (DEVELOPMENT || DEBUG)
3734 extern int vm_footprint_suspend_allowed;
3735 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3736 
3737 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3738 static int
3739 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3740 {
3741 #pragma unused(oidp, arg1, arg2)
3742 	int error = 0;
3743 	int new_value;
3744 
3745 	if (req->newptr == USER_ADDR_NULL) {
3746 		return 0;
3747 	}
3748 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3749 	if (error) {
3750 		return error;
3751 	}
3752 	if (!vm_footprint_suspend_allowed) {
3753 		if (new_value != 0) {
3754 			/* suspends are not allowed... */
3755 			return 0;
3756 		}
3757 		/* ... but let resumes proceed */
3758 	}
3759 	DTRACE_VM2(footprint_suspend,
3760 	    vm_map_t, current_map(),
3761 	    int, new_value);
3762 
3763 	pmap_footprint_suspend(current_map(), new_value);
3764 
3765 	return 0;
3766 }
3767 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3768     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3769     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3770 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3771 
3772 extern uint64_t vm_map_corpse_footprint_count;
3773 extern uint64_t vm_map_corpse_footprint_size_avg;
3774 extern uint64_t vm_map_corpse_footprint_size_max;
3775 extern uint64_t vm_map_corpse_footprint_full;
3776 extern uint64_t vm_map_corpse_footprint_no_buf;
3777 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3778     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3779 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3780     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3781 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3782     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3783 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3784     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3785 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3786     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3787 
3788 #if CODE_SIGNING_MONITOR
3789 extern uint64_t vm_cs_defer_to_csm;
3790 extern uint64_t vm_cs_defer_to_csm_not;
3791 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3792     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3793 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3794     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3795 #endif /* CODE_SIGNING_MONITOR */
3796 
3797 extern uint64_t shared_region_pager_copied;
3798 extern uint64_t shared_region_pager_slid;
3799 extern uint64_t shared_region_pager_slid_error;
3800 extern uint64_t shared_region_pager_reclaimed;
3801 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3802     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3803 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3804     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3805 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3806     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3807 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3808     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3809 extern int shared_region_destroy_delay;
3810 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3811     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3812 
3813 #if MACH_ASSERT
3814 extern int pmap_ledgers_panic_leeway;
3815 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3816 #endif /* MACH_ASSERT */
3817 
3818 
3819 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3820 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3821 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3822 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3823 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3824 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3825 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3826 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3827 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3828 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3829 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3830 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3831 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3832 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3833     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3834 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3835     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3836 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3837     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3838 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3839     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3840 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3841     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3842 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3843     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3844 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3845     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3846 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3847     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3848 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3849     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3850 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3851     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3852 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3853     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3854 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3855     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3856 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3857     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3858 
3859 extern int vm_protect_privileged_from_untrusted;
3860 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3861     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3862 extern uint64_t vm_copied_on_read;
3863 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3864     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3865 
3866 extern int vm_shared_region_count;
3867 extern int vm_shared_region_peak;
3868 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3869     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3870 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3871     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3872 #if DEVELOPMENT || DEBUG
3873 extern unsigned int shared_region_pagers_resident_count;
3874 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3875     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3876 extern unsigned int shared_region_pagers_resident_peak;
3877 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3878     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3879 extern int shared_region_pager_count;
3880 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3881     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3882 #if __has_feature(ptrauth_calls)
3883 extern int shared_region_key_count;
3884 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3885     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3886 extern int vm_shared_region_reslide_count;
3887 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3888     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3889 #endif /* __has_feature(ptrauth_calls) */
3890 #endif /* DEVELOPMENT || DEBUG */
3891 
3892 #if MACH_ASSERT
3893 extern int debug4k_filter;
3894 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3895 extern int debug4k_panic_on_terminate;
3896 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3897 extern int debug4k_panic_on_exception;
3898 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3899 extern int debug4k_panic_on_misaligned_sharing;
3900 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3901 #endif /* MACH_ASSERT */
3902 
3903 extern uint64_t vm_map_set_size_limit_count;
3904 extern uint64_t vm_map_set_data_limit_count;
3905 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3906 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3907 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3908 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3909 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3910 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3911 
3912 extern uint64_t vm_fault_resilient_media_initiate;
3913 extern uint64_t vm_fault_resilient_media_retry;
3914 extern uint64_t vm_fault_resilient_media_proceed;
3915 extern uint64_t vm_fault_resilient_media_release;
3916 extern uint64_t vm_fault_resilient_media_abort1;
3917 extern uint64_t vm_fault_resilient_media_abort2;
3918 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3919 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3920 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3921 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3922 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3923 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3924 #if MACH_ASSERT
3925 extern int vm_fault_resilient_media_inject_error1_rate;
3926 extern int vm_fault_resilient_media_inject_error1;
3927 extern int vm_fault_resilient_media_inject_error2_rate;
3928 extern int vm_fault_resilient_media_inject_error2;
3929 extern int vm_fault_resilient_media_inject_error3_rate;
3930 extern int vm_fault_resilient_media_inject_error3;
3931 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3932 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3933 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3934 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3935 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3936 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3937 #endif /* MACH_ASSERT */
3938 
3939 extern uint64_t pmap_query_page_info_retries;
3940 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3941 
3942 /*
3943  * A sysctl which causes all existing shared regions to become stale. They
3944  * will no longer be used by anything new and will be torn down as soon as
3945  * the last existing user exits. A write of non-zero value causes that to happen.
3946  * This should only be used by launchd, so we check that this is initproc.
3947  */
3948 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3949 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3950 {
3951 	unsigned int value = 0;
3952 	int changed = 0;
3953 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3954 	if (error || !changed) {
3955 		return error;
3956 	}
3957 	if (current_proc() != initproc) {
3958 		return EPERM;
3959 	}
3960 
3961 	vm_shared_region_pivot();
3962 
3963 	return 0;
3964 }
3965 
3966 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3967     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3968     0, 0, shared_region_pivot, "I", "");
3969 
3970 extern uint64_t vm_object_shadow_forced;
3971 extern uint64_t vm_object_shadow_skipped;
3972 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3973     &vm_object_shadow_forced, "");
3974 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3975     &vm_object_shadow_skipped, "");
3976 
3977 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3978     &vmtc_total, 0, "total text page corruptions detected");
3979 
3980 
3981 #if DEBUG || DEVELOPMENT
3982 /*
3983  * A sysctl that can be used to corrupt a text page with an illegal instruction.
3984  * Used for testing text page self healing.
3985  */
3986 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3987 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3988 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3989 {
3990 	uint64_t value = 0;
3991 	int error = sysctl_handle_quad(oidp, &value, 0, req);
3992 	if (error || !req->newptr) {
3993 		return error;
3994 	}
3995 
3996 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3997 		return 0;
3998 	} else {
3999 		return EINVAL;
4000 	}
4001 }
4002 
4003 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4004     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4005     0, 0, corrupt_text_addr, "-", "");
4006 #endif /* DEBUG || DEVELOPMENT */
4007 
4008 #if DEBUG || DEVELOPMENT
4009 #if CONFIG_MAP_RANGES
4010 static int
4011 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4012 {
4013 #pragma unused(arg1, arg2, oidp)
4014 	struct mach_vm_range range;
4015 
4016 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4017 	    != KERN_SUCCESS) {
4018 		return EINVAL;
4019 	}
4020 
4021 	return SYSCTL_OUT(req, &range, sizeof(range));
4022 }
4023 
4024 static int
4025 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4026 {
4027 #pragma unused(arg1, arg2, oidp)
4028 	struct mach_vm_range range;
4029 
4030 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4031 	    != KERN_SUCCESS) {
4032 		return EINVAL;
4033 	}
4034 
4035 	return SYSCTL_OUT(req, &range, sizeof(range));
4036 }
4037 
4038 /*
4039  * A sysctl that can be used to return ranges for the current VM map.
4040  * Used for testing VM ranges.
4041  */
4042 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4043     0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4044 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4045     0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4046 
4047 #endif /* CONFIG_MAP_RANGES */
4048 #endif /* DEBUG || DEVELOPMENT */
4049 
4050 #if DEBUG || DEVELOPMENT
4051 #endif /* DEBUG || DEVELOPMENT */
4052 
4053 extern uint64_t c_seg_filled_no_contention;
4054 extern uint64_t c_seg_filled_contention;
4055 extern clock_sec_t c_seg_filled_contention_sec_max;
4056 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4057 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4058 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4059 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4060 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4061 #if (XNU_TARGET_OS_OSX && __arm64__)
4062 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4063 extern int c_process_major_yield_after; /* yield after moving ? segments */
4064 extern uint64_t c_process_major_reports;
4065 extern clock_sec_t c_process_major_max_sec;
4066 extern clock_nsec_t c_process_major_max_nsec;
4067 extern uint32_t c_process_major_peak_segcount;
4068 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4069 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4070 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4071 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4072 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4073 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4074 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4075 
4076 #if DEVELOPMENT || DEBUG
4077 extern int panic_object_not_alive;
4078 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4079 #endif /* DEVELOPMENT || DEBUG */
4080 
4081 #if MACH_ASSERT
4082 extern int fbdp_no_panic;
4083 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4084 #endif /* MACH_ASSERT */
4085