xref: /xnu-10002.61.3/bsd/vm/vm_unix.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58 
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93 
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97 
98 #include <kern/kalloc.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_kern.h>
101 #include <vm/vm_pageout.h>
102 
103 #include <mach/shared_region.h>
104 #include <vm/vm_shared_region.h>
105 
106 #include <vm/vm_dyld_pager.h>
107 
108 #include <vm/vm_protos.h>
109 
110 #include <sys/kern_memorystatus.h>
111 #include <sys/kern_memorystatus_freeze.h>
112 #include <sys/proc_internal.h>
113 
114 #include <mach-o/fixup-chains.h>
115 
116 #if CONFIG_MACF
117 #include <security/mac_framework.h>
118 #endif
119 
120 #include <kern/bits.h>
121 
122 #if CONFIG_CSR
123 #include <sys/csr.h>
124 #endif /* CONFIG_CSR */
125 #include <sys/trust_caches.h>
126 #include <libkern/amfi/amfi.h>
127 #include <IOKit/IOBSD.h>
128 
129 #if VM_MAP_DEBUG_APPLE_PROTECT
130 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
131 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
132 
133 #if VM_MAP_DEBUG_FOURK
134 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
135 #endif /* VM_MAP_DEBUG_FOURK */
136 
137 #if DEVELOPMENT || DEBUG
138 
139 static int
140 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 	vm_offset_t     kaddr;
144 	kern_return_t   kr;
145 	int     error = 0;
146 	int     size = 0;
147 
148 	error = sysctl_handle_int(oidp, &size, 0, req);
149 	if (error || !req->newptr) {
150 		return error;
151 	}
152 
153 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
154 	    0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
155 
156 	if (kr == KERN_SUCCESS) {
157 		kmem_free(kernel_map, kaddr, size);
158 	}
159 
160 	return error;
161 }
162 
163 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
164     0, 0, &sysctl_kmem_alloc_contig, "I", "");
165 
166 extern int vm_region_footprint;
167 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
168 
169 static int
170 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 	kmem_gobj_stats stats = kmem_get_gobj_stats();
174 
175 	return SYSCTL_OUT(req, &stats, sizeof(stats));
176 }
177 
178 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
179     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
180     0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
181 
182 #endif /* DEVELOPMENT || DEBUG */
183 
184 static int
185 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
186 {
187 #pragma unused(arg1, arg2, oidp)
188 	int     error = 0;
189 	int     value;
190 
191 	value = task_self_region_footprint();
192 	error = SYSCTL_OUT(req, &value, sizeof(int));
193 	if (error) {
194 		return error;
195 	}
196 
197 	if (!req->newptr) {
198 		return 0;
199 	}
200 
201 	error = SYSCTL_IN(req, &value, sizeof(int));
202 	if (error) {
203 		return error;
204 	}
205 	task_self_region_footprint_set(value);
206 	return 0;
207 }
208 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
209 
210 static int
211 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
212 {
213 #pragma unused(arg1, arg2, oidp)
214 	int     error = 0;
215 	int     value;
216 
217 	value = (1 << thread_self_region_page_shift());
218 	error = SYSCTL_OUT(req, &value, sizeof(int));
219 	if (error) {
220 		return error;
221 	}
222 
223 	if (!req->newptr) {
224 		return 0;
225 	}
226 
227 	error = SYSCTL_IN(req, &value, sizeof(int));
228 	if (error) {
229 		return error;
230 	}
231 
232 	if (value != 0 && value != 4096 && value != 16384) {
233 		return EINVAL;
234 	}
235 
236 #if !__ARM_MIXED_PAGE_SIZE__
237 	if (value != vm_map_page_size(current_map())) {
238 		return EINVAL;
239 	}
240 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
241 
242 	thread_self_region_page_shift_set(bit_first(value));
243 	return 0;
244 }
245 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
246 
247 
248 #if DEVELOPMENT || DEBUG
249 extern int panic_on_unsigned_execute;
250 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
251 
252 extern int vm_log_xnu_user_debug;
253 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
254 #endif /* DEVELOPMENT || DEBUG */
255 
256 extern int cs_executable_create_upl;
257 extern int cs_executable_wire;
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
259 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
260 
261 extern int apple_protect_pager_count;
262 extern int apple_protect_pager_count_mapped;
263 extern unsigned int apple_protect_pager_cache_limit;
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
265 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
266 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
267 
268 #if DEVELOPMENT || DEBUG
269 extern int radar_20146450;
270 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
271 
272 extern int macho_printf;
273 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
274 
275 extern int apple_protect_pager_data_request_debug;
276 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
277 
278 #if __arm64__
279 /* These are meant to support the page table accounting unit test. */
280 extern unsigned int arm_hardware_page_size;
281 extern unsigned int arm_pt_desc_size;
282 extern unsigned int arm_pt_root_size;
283 extern unsigned int inuse_user_tteroot_count;
284 extern unsigned int inuse_kernel_tteroot_count;
285 extern unsigned int inuse_user_ttepages_count;
286 extern unsigned int inuse_kernel_ttepages_count;
287 extern unsigned int inuse_user_ptepages_count;
288 extern unsigned int inuse_kernel_ptepages_count;
289 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
297 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
298 extern unsigned int free_page_size_tt_count;
299 extern unsigned int free_two_page_size_tt_count;
300 extern unsigned int free_tt_count;
301 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
303 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
304 #if DEVELOPMENT || DEBUG
305 extern unsigned long pmap_asid_flushes;
306 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
307 extern unsigned long pmap_asid_hits;
308 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
309 extern unsigned long pmap_asid_misses;
310 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
311 #endif
312 #endif /* __arm64__ */
313 
314 #if __arm64__
315 extern int fourk_pager_data_request_debug;
316 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
317 #endif /* __arm64__ */
318 #endif /* DEVELOPMENT || DEBUG */
319 
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
328 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
329 #if VM_SCAN_FOR_SHADOW_CHAIN
330 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
331 extern int proc_shadow_max(void);
332 static int
333 vm_shadow_max SYSCTL_HANDLER_ARGS
334 {
335 #pragma unused(arg1, arg2, oidp)
336 	int value = 0;
337 
338 	if (vm_shadow_max_enabled) {
339 		value = proc_shadow_max();
340 	}
341 
342 	return SYSCTL_OUT(req, &value, sizeof(value));
343 }
344 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
345     0, 0, &vm_shadow_max, "I", "");
346 
347 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
348 
349 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
350 
351 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
352 
353 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
354 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
355 /*
356  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
357  */
358 
359 #if DEVELOPMENT || DEBUG
360 extern int allow_stack_exec, allow_data_exec;
361 
362 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
363 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
364 
365 #endif /* DEVELOPMENT || DEBUG */
366 
367 static const char *prot_values[] = {
368 	"none",
369 	"read-only",
370 	"write-only",
371 	"read-write",
372 	"execute-only",
373 	"read-execute",
374 	"write-execute",
375 	"read-write-execute"
376 };
377 
378 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)379 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
380 {
381 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
382 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
383 }
384 
385 /*
386  * shared_region_unnest_logging: level of logging of unnesting events
387  * 0	- no logging
388  * 1	- throttled logging of unexpected unnesting events (default)
389  * 2	- unthrottled logging of unexpected unnesting events
390  * 3+	- unthrottled logging of all unnesting events
391  */
392 int shared_region_unnest_logging = 1;
393 
394 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
395     &shared_region_unnest_logging, 0, "");
396 
397 int vm_shared_region_unnest_log_interval = 10;
398 int shared_region_unnest_log_count_threshold = 5;
399 
400 
401 #if XNU_TARGET_OS_OSX
402 
403 #if defined (__x86_64__)
404 static int scdir_enforce = 1;
405 #else /* defined (__x86_64__) */
406 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
407 #endif /* defined (__x86_64__) */
408 
409 static char *scdir_path[] = {
410 	"/System/Library/dyld/",
411 	"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
412 	"/System/Cryptexes/OS/System/Library/dyld",
413 	NULL
414 };
415 
416 #else /* XNU_TARGET_OS_OSX */
417 
418 static int scdir_enforce = 0;
419 static char *scdir_path[] = {
420 	"/System/Library/Caches/com.apple.dyld/",
421 	"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 	"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
423 	NULL
424 };
425 
426 #endif /* XNU_TARGET_OS_OSX */
427 
428 static char *driverkit_scdir_path[] = {
429 	"/System/DriverKit/System/Library/dyld/",
430 #if XNU_TARGET_OS_OSX
431 	"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
432 #else
433 	"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
434 #endif /* XNU_TARGET_OS_OSX */
435 	"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
436 	NULL
437 };
438 
439 #ifndef SECURE_KERNEL
440 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
441 {
442 #if CONFIG_CSR
443 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
444 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
445 		return EPERM;
446 	}
447 #endif /* CONFIG_CSR */
448 	return sysctl_handle_int(oidp, arg1, arg2, req);
449 }
450 
451 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
452 #endif
453 
454 /* These log rate throttling state variables aren't thread safe, but
455  * are sufficient unto the task.
456  */
457 static int64_t last_unnest_log_time = 0;
458 static int shared_region_unnest_log_count = 0;
459 
460 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)461 log_unnest_badness(
462 	vm_map_t        m,
463 	vm_map_offset_t s,
464 	vm_map_offset_t e,
465 	boolean_t       is_nested_map,
466 	vm_map_offset_t lowest_unnestable_addr)
467 {
468 	struct timeval  tv;
469 
470 	if (shared_region_unnest_logging == 0) {
471 		return;
472 	}
473 
474 	if (shared_region_unnest_logging <= 2 &&
475 	    is_nested_map &&
476 	    s >= lowest_unnestable_addr) {
477 		/*
478 		 * Unnesting of writable map entries is fine.
479 		 */
480 		return;
481 	}
482 
483 	if (shared_region_unnest_logging <= 1) {
484 		microtime(&tv);
485 		if ((tv.tv_sec - last_unnest_log_time) <
486 		    vm_shared_region_unnest_log_interval) {
487 			if (shared_region_unnest_log_count++ >
488 			    shared_region_unnest_log_count_threshold) {
489 				return;
490 			}
491 		} else {
492 			last_unnest_log_time = tv.tv_sec;
493 			shared_region_unnest_log_count = 0;
494 		}
495 	}
496 
497 	DTRACE_VM4(log_unnest_badness,
498 	    vm_map_t, m,
499 	    vm_map_offset_t, s,
500 	    vm_map_offset_t, e,
501 	    vm_map_offset_t, lowest_unnestable_addr);
502 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
503 }
504 
505 uint64_t
vm_purge_filebacked_pagers(void)506 vm_purge_filebacked_pagers(void)
507 {
508 	uint64_t pages_purged;
509 
510 	pages_purged = 0;
511 	pages_purged += apple_protect_pager_purge_all();
512 	pages_purged += shared_region_pager_purge_all();
513 	pages_purged += dyld_pager_purge_all();
514 #if DEVELOPMENT || DEBUG
515 	printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
516 #endif /* DEVELOPMENT || DEBUG */
517 	return pages_purged;
518 }
519 
520 int
useracc(user_addr_t addr,user_size_t len,int prot)521 useracc(
522 	user_addr_t     addr,
523 	user_size_t     len,
524 	int     prot)
525 {
526 	vm_map_t        map;
527 
528 	map = current_map();
529 	return vm_map_check_protection(
530 		map,
531 		vm_map_trunc_page(addr,
532 		vm_map_page_mask(map)),
533 		vm_map_round_page(addr + len,
534 		vm_map_page_mask(map)),
535 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
536 }
537 
538 int
vslock(user_addr_t addr,user_size_t len)539 vslock(
540 	user_addr_t     addr,
541 	user_size_t     len)
542 {
543 	kern_return_t   kret;
544 	vm_map_t        map;
545 
546 	map = current_map();
547 	kret = vm_map_wire_kernel(map,
548 	    vm_map_trunc_page(addr,
549 	    vm_map_page_mask(map)),
550 	    vm_map_round_page(addr + len,
551 	    vm_map_page_mask(map)),
552 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
553 	    FALSE);
554 
555 	switch (kret) {
556 	case KERN_SUCCESS:
557 		return 0;
558 	case KERN_INVALID_ADDRESS:
559 	case KERN_NO_SPACE:
560 		return ENOMEM;
561 	case KERN_PROTECTION_FAILURE:
562 		return EACCES;
563 	default:
564 		return EINVAL;
565 	}
566 }
567 
568 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)569 vsunlock(
570 	user_addr_t addr,
571 	user_size_t len,
572 	__unused int dirtied)
573 {
574 #if FIXME  /* [ */
575 	pmap_t          pmap;
576 	vm_page_t       pg;
577 	vm_map_offset_t vaddr;
578 	ppnum_t         paddr;
579 #endif  /* FIXME ] */
580 	kern_return_t   kret;
581 	vm_map_t        map;
582 
583 	map = current_map();
584 
585 #if FIXME  /* [ */
586 	if (dirtied) {
587 		pmap = get_task_pmap(current_task());
588 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
589 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
590 		    vaddr += PAGE_SIZE) {
591 			paddr = pmap_find_phys(pmap, vaddr);
592 			pg = PHYS_TO_VM_PAGE(paddr);
593 			vm_page_set_modified(pg);
594 		}
595 	}
596 #endif  /* FIXME ] */
597 #ifdef  lint
598 	dirtied++;
599 #endif  /* lint */
600 	kret = vm_map_unwire(map,
601 	    vm_map_trunc_page(addr,
602 	    vm_map_page_mask(map)),
603 	    vm_map_round_page(addr + len,
604 	    vm_map_page_mask(map)),
605 	    FALSE);
606 	switch (kret) {
607 	case KERN_SUCCESS:
608 		return 0;
609 	case KERN_INVALID_ADDRESS:
610 	case KERN_NO_SPACE:
611 		return ENOMEM;
612 	case KERN_PROTECTION_FAILURE:
613 		return EACCES;
614 	default:
615 		return EINVAL;
616 	}
617 }
618 
619 int
subyte(user_addr_t addr,int byte)620 subyte(
621 	user_addr_t addr,
622 	int byte)
623 {
624 	char character;
625 
626 	character = (char)byte;
627 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
628 }
629 
630 int
suibyte(user_addr_t addr,int byte)631 suibyte(
632 	user_addr_t addr,
633 	int byte)
634 {
635 	char character;
636 
637 	character = (char)byte;
638 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
639 }
640 
641 int
fubyte(user_addr_t addr)642 fubyte(user_addr_t addr)
643 {
644 	unsigned char byte;
645 
646 	if (copyin(addr, (void *) &byte, sizeof(char))) {
647 		return -1;
648 	}
649 	return byte;
650 }
651 
652 int
fuibyte(user_addr_t addr)653 fuibyte(user_addr_t addr)
654 {
655 	unsigned char byte;
656 
657 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
658 		return -1;
659 	}
660 	return byte;
661 }
662 
663 int
suword(user_addr_t addr,long word)664 suword(
665 	user_addr_t addr,
666 	long word)
667 {
668 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
669 }
670 
671 long
fuword(user_addr_t addr)672 fuword(user_addr_t addr)
673 {
674 	long word = 0;
675 
676 	if (copyin(addr, (void *) &word, sizeof(int))) {
677 		return -1;
678 	}
679 	return word;
680 }
681 
682 /* suiword and fuiword are the same as suword and fuword, respectively */
683 
684 int
suiword(user_addr_t addr,long word)685 suiword(
686 	user_addr_t addr,
687 	long word)
688 {
689 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
690 }
691 
692 long
fuiword(user_addr_t addr)693 fuiword(user_addr_t addr)
694 {
695 	long word = 0;
696 
697 	if (copyin(addr, (void *) &word, sizeof(int))) {
698 		return -1;
699 	}
700 	return word;
701 }
702 
703 /*
704  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
705  * fetching and setting of process-sized size_t and pointer values.
706  */
707 int
sulong(user_addr_t addr,int64_t word)708 sulong(user_addr_t addr, int64_t word)
709 {
710 	if (IS_64BIT_PROCESS(current_proc())) {
711 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
712 	} else {
713 		return suiword(addr, (long)word);
714 	}
715 }
716 
717 int64_t
fulong(user_addr_t addr)718 fulong(user_addr_t addr)
719 {
720 	int64_t longword;
721 
722 	if (IS_64BIT_PROCESS(current_proc())) {
723 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
724 			return -1;
725 		}
726 		return longword;
727 	} else {
728 		return (int64_t)fuiword(addr);
729 	}
730 }
731 
732 int
suulong(user_addr_t addr,uint64_t uword)733 suulong(user_addr_t addr, uint64_t uword)
734 {
735 	if (IS_64BIT_PROCESS(current_proc())) {
736 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
737 	} else {
738 		return suiword(addr, (uint32_t)uword);
739 	}
740 }
741 
742 uint64_t
fuulong(user_addr_t addr)743 fuulong(user_addr_t addr)
744 {
745 	uint64_t ulongword;
746 
747 	if (IS_64BIT_PROCESS(current_proc())) {
748 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
749 			return -1ULL;
750 		}
751 		return ulongword;
752 	} else {
753 		return (uint64_t)fuiword(addr);
754 	}
755 }
756 
757 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)758 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
759 {
760 	return ENOTSUP;
761 }
762 
763 /*
764  * pid_for_task
765  *
766  * Find the BSD process ID for the Mach task associated with the given Mach port
767  * name
768  *
769  * Parameters:	args		User argument descriptor (see below)
770  *
771  * Indirect parameters:	args->t		Mach port name
772  *                      args->pid	Process ID (returned value; see below)
773  *
774  * Returns:	KERL_SUCCESS	Success
775  *              KERN_FAILURE	Not success
776  *
777  * Implicit returns: args->pid		Process ID
778  *
779  */
780 kern_return_t
pid_for_task(struct pid_for_task_args * args)781 pid_for_task(
782 	struct pid_for_task_args *args)
783 {
784 	mach_port_name_t        t = args->t;
785 	user_addr_t             pid_addr  = args->pid;
786 	proc_t p;
787 	task_t          t1;
788 	int     pid = -1;
789 	kern_return_t   err = KERN_SUCCESS;
790 
791 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
792 	AUDIT_ARG(mach_port1, t);
793 
794 	t1 = port_name_to_task_name(t);
795 
796 	if (t1 == TASK_NULL) {
797 		err = KERN_FAILURE;
798 		goto pftout;
799 	} else {
800 		p = get_bsdtask_info(t1);
801 		if (p) {
802 			pid  = proc_pid(p);
803 			err = KERN_SUCCESS;
804 		} else if (task_is_a_corpse(t1)) {
805 			pid = task_pid(t1);
806 			err = KERN_SUCCESS;
807 		} else {
808 			err = KERN_FAILURE;
809 		}
810 	}
811 	task_deallocate(t1);
812 pftout:
813 	AUDIT_ARG(pid, pid);
814 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
815 	AUDIT_MACH_SYSCALL_EXIT(err);
816 	return err;
817 }
818 
819 /*
820  *
821  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
822  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
823  *
824  */
825 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
826 
827 /*
828  *	Routine:	task_for_pid_posix_check
829  *	Purpose:
830  *			Verify that the current process should be allowed to
831  *			get the target process's task port. This is only
832  *			permitted if:
833  *			- The current process is root
834  *			OR all of the following are true:
835  *			- The target process's real, effective, and saved uids
836  *			  are the same as the current proc's euid,
837  *			- The target process's group set is a subset of the
838  *			  calling process's group set, and
839  *			- The target process hasn't switched credentials.
840  *
841  *	Returns:	TRUE: permitted
842  *			FALSE: denied
843  */
844 static int
task_for_pid_posix_check(proc_t target)845 task_for_pid_posix_check(proc_t target)
846 {
847 	kauth_cred_t targetcred, mycred;
848 	bool checkcredentials;
849 	uid_t myuid;
850 	int allowed;
851 
852 	/* No task_for_pid on bad targets */
853 	if (target->p_stat == SZOMB) {
854 		return FALSE;
855 	}
856 
857 	mycred = kauth_cred_get();
858 	myuid = kauth_cred_getuid(mycred);
859 
860 	/* If we're running as root, the check passes */
861 	if (kauth_cred_issuser(mycred)) {
862 		return TRUE;
863 	}
864 
865 	/* We're allowed to get our own task port */
866 	if (target == current_proc()) {
867 		return TRUE;
868 	}
869 
870 	/*
871 	 * Under DENY, only root can get another proc's task port,
872 	 * so no more checks are needed.
873 	 */
874 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
875 		return FALSE;
876 	}
877 
878 	targetcred = kauth_cred_proc_ref(target);
879 	allowed = TRUE;
880 
881 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
882 
883 	if (checkcredentials) {
884 		/* Do target's ruid, euid, and saved uid match my euid? */
885 		if ((kauth_cred_getuid(targetcred) != myuid) ||
886 		    (kauth_cred_getruid(targetcred) != myuid) ||
887 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
888 			allowed = FALSE;
889 			goto out;
890 		}
891 		/* Are target's groups a subset of my groups? */
892 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
893 		    allowed == 0) {
894 			allowed = FALSE;
895 			goto out;
896 		}
897 	}
898 
899 	/* Has target switched credentials? */
900 	if (target->p_flag & P_SUGID) {
901 		allowed = FALSE;
902 		goto out;
903 	}
904 
905 out:
906 	kauth_cred_unref(&targetcred);
907 	return allowed;
908 }
909 
910 /*
911  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
912  *
913  *	Description:	Waits for the user space daemon to respond to the request
914  *			we made. Function declared non inline to be visible in
915  *			stackshots and spindumps as well as debugging.
916  */
917 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)918 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
919 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
920 {
921 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
922 }
923 
924 /*
925  *	Routine:	task_for_pid
926  *	Purpose:
927  *		Get the task port for another "process", named by its
928  *		process ID on the same host as "target_task".
929  *
930  *		Only permitted to privileged processes, or processes
931  *		with the same user ID.
932  *
933  *		Note: if pid == 0, an error is return no matter who is calling.
934  *
935  * XXX This should be a BSD system call, not a Mach trap!!!
936  */
937 kern_return_t
task_for_pid(struct task_for_pid_args * args)938 task_for_pid(
939 	struct task_for_pid_args *args)
940 {
941 	mach_port_name_t        target_tport = args->target_tport;
942 	int                     pid = args->pid;
943 	user_addr_t             task_addr = args->t;
944 	proc_t                  p = PROC_NULL;
945 	task_t                  t1 = TASK_NULL;
946 	task_t                  task = TASK_NULL;
947 	mach_port_name_t        tret = MACH_PORT_NULL;
948 	ipc_port_t              tfpport = MACH_PORT_NULL;
949 	void                    * sright = NULL;
950 	int                     error = 0;
951 	boolean_t               is_current_proc = FALSE;
952 	struct proc_ident       pident = {0};
953 
954 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
955 	AUDIT_ARG(pid, pid);
956 	AUDIT_ARG(mach_port1, target_tport);
957 
958 	/* Always check if pid == 0 */
959 	if (pid == 0) {
960 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
961 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
962 		return KERN_FAILURE;
963 	}
964 
965 	t1 = port_name_to_task(target_tport);
966 	if (t1 == TASK_NULL) {
967 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
968 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
969 		return KERN_FAILURE;
970 	}
971 
972 
973 	p = proc_find(pid);
974 	if (p == PROC_NULL) {
975 		error = KERN_FAILURE;
976 		goto tfpout;
977 	}
978 	pident = proc_ident(p);
979 	is_current_proc = (p == current_proc());
980 
981 #if CONFIG_AUDIT
982 	AUDIT_ARG(process, p);
983 #endif
984 
985 	if (!(task_for_pid_posix_check(p))) {
986 		error = KERN_FAILURE;
987 		goto tfpout;
988 	}
989 
990 	if (proc_task(p) == TASK_NULL) {
991 		error = KERN_SUCCESS;
992 		goto tfpout;
993 	}
994 
995 	/*
996 	 * Grab a task reference and drop the proc reference as the proc ref
997 	 * shouldn't be held accross upcalls.
998 	 */
999 	task = proc_task(p);
1000 	task_reference(task);
1001 
1002 	proc_rele(p);
1003 	p = PROC_NULL;
1004 
1005 #if CONFIG_MACF
1006 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1007 	if (error) {
1008 		error = KERN_FAILURE;
1009 		goto tfpout;
1010 	}
1011 #endif
1012 
1013 	/* If we aren't root and target's task access port is set... */
1014 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1015 	    !is_current_proc &&
1016 	    (task_get_task_access_port(task, &tfpport) == 0) &&
1017 	    (tfpport != IPC_PORT_NULL)) {
1018 		if (tfpport == IPC_PORT_DEAD) {
1019 			error = KERN_PROTECTION_FAILURE;
1020 			goto tfpout;
1021 		}
1022 
1023 		/* Call up to the task access server */
1024 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1025 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1026 
1027 		if (error != MACH_MSG_SUCCESS) {
1028 			if (error == MACH_RCV_INTERRUPTED) {
1029 				error = KERN_ABORTED;
1030 			} else {
1031 				error = KERN_FAILURE;
1032 			}
1033 			goto tfpout;
1034 		}
1035 	}
1036 
1037 	/* Grant task port access */
1038 	extmod_statistics_incr_task_for_pid(task);
1039 
1040 	/* this reference will be consumed during conversion */
1041 	task_reference(task);
1042 	if (task == current_task()) {
1043 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
1044 		sright = (void *)convert_task_to_port_pinned(task);
1045 	} else {
1046 		sright = (void *)convert_task_to_port(task);
1047 	}
1048 	/* extra task ref consumed */
1049 
1050 	/*
1051 	 * Check if the task has been corpsified. We must do so after conversion
1052 	 * since we don't hold locks and may have grabbed a corpse control port
1053 	 * above which will prevent no-senders notification delivery.
1054 	 */
1055 	if (task_is_a_corpse(task)) {
1056 		ipc_port_release_send(sright);
1057 		error = KERN_FAILURE;
1058 		goto tfpout;
1059 	}
1060 
1061 	tret = ipc_port_copyout_send(
1062 		sright,
1063 		get_task_ipcspace(current_task()));
1064 
1065 	error = KERN_SUCCESS;
1066 
1067 tfpout:
1068 	task_deallocate(t1);
1069 	AUDIT_ARG(mach_port2, tret);
1070 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1071 
1072 	if (tfpport != IPC_PORT_NULL) {
1073 		ipc_port_release_send(tfpport);
1074 	}
1075 	if (task != TASK_NULL) {
1076 		task_deallocate(task);
1077 	}
1078 	if (p != PROC_NULL) {
1079 		proc_rele(p);
1080 	}
1081 	AUDIT_MACH_SYSCALL_EXIT(error);
1082 	return error;
1083 }
1084 
1085 /*
1086  *	Routine:	task_name_for_pid
1087  *	Purpose:
1088  *		Get the task name port for another "process", named by its
1089  *		process ID on the same host as "target_task".
1090  *
1091  *		Only permitted to privileged processes, or processes
1092  *		with the same user ID.
1093  *
1094  * XXX This should be a BSD system call, not a Mach trap!!!
1095  */
1096 
1097 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1098 task_name_for_pid(
1099 	struct task_name_for_pid_args *args)
1100 {
1101 	mach_port_name_t        target_tport = args->target_tport;
1102 	int                     pid = args->pid;
1103 	user_addr_t             task_addr = args->t;
1104 	proc_t                  p = PROC_NULL;
1105 	task_t                  t1 = TASK_NULL;
1106 	mach_port_name_t        tret = MACH_PORT_NULL;
1107 	void * sright;
1108 	int error = 0, refheld = 0;
1109 	kauth_cred_t target_cred;
1110 
1111 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1112 	AUDIT_ARG(pid, pid);
1113 	AUDIT_ARG(mach_port1, target_tport);
1114 
1115 	t1 = port_name_to_task(target_tport);
1116 	if (t1 == TASK_NULL) {
1117 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1118 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1119 		return KERN_FAILURE;
1120 	}
1121 
1122 	p = proc_find(pid);
1123 	if (p != PROC_NULL) {
1124 		AUDIT_ARG(process, p);
1125 		target_cred = kauth_cred_proc_ref(p);
1126 		refheld = 1;
1127 
1128 		if ((p->p_stat != SZOMB)
1129 		    && ((current_proc() == p)
1130 		    || kauth_cred_issuser(kauth_cred_get())
1131 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1132 		    ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1133 		    || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1134 		    )) {
1135 			if (proc_task(p) != TASK_NULL) {
1136 				struct proc_ident pident = proc_ident(p);
1137 
1138 				task_t task = proc_task(p);
1139 
1140 				task_reference(task);
1141 				proc_rele(p);
1142 				p = PROC_NULL;
1143 #if CONFIG_MACF
1144 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1145 				if (error) {
1146 					task_deallocate(task);
1147 					goto noperm;
1148 				}
1149 #endif
1150 				sright = (void *)convert_task_name_to_port(task);
1151 				task = NULL;
1152 				tret = ipc_port_copyout_send(sright,
1153 				    get_task_ipcspace(current_task()));
1154 			} else {
1155 				tret  = MACH_PORT_NULL;
1156 			}
1157 
1158 			AUDIT_ARG(mach_port2, tret);
1159 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1160 			task_deallocate(t1);
1161 			error = KERN_SUCCESS;
1162 			goto tnfpout;
1163 		}
1164 	}
1165 
1166 #if CONFIG_MACF
1167 noperm:
1168 #endif
1169 	task_deallocate(t1);
1170 	tret = MACH_PORT_NULL;
1171 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1172 	error = KERN_FAILURE;
1173 tnfpout:
1174 	if (refheld != 0) {
1175 		kauth_cred_unref(&target_cred);
1176 	}
1177 	if (p != PROC_NULL) {
1178 		proc_rele(p);
1179 	}
1180 	AUDIT_MACH_SYSCALL_EXIT(error);
1181 	return error;
1182 }
1183 
1184 /*
1185  *	Routine:	task_inspect_for_pid
1186  *	Purpose:
1187  *		Get the task inspect port for another "process", named by its
1188  *		process ID on the same host as "target_task".
1189  */
1190 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1191 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1192 {
1193 	mach_port_name_t        target_tport = args->target_tport;
1194 	int                     pid = args->pid;
1195 	user_addr_t             task_addr = args->t;
1196 
1197 	proc_t                  proc = PROC_NULL;
1198 	task_t                  t1 = TASK_NULL;
1199 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1200 	mach_port_name_t        tret = MACH_PORT_NULL;
1201 	ipc_port_t              tfpport = MACH_PORT_NULL;
1202 	int                     error = 0;
1203 	void                    *sright = NULL;
1204 	boolean_t               is_current_proc = FALSE;
1205 	struct proc_ident       pident = {0};
1206 
1207 	/* Disallow inspect port for kernel_task */
1208 	if (pid == 0) {
1209 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1210 		return EPERM;
1211 	}
1212 
1213 	t1 = port_name_to_task(target_tport);
1214 	if (t1 == TASK_NULL) {
1215 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1216 		return EINVAL;
1217 	}
1218 
1219 	proc = proc_find(pid);
1220 	if (proc == PROC_NULL) {
1221 		error = ESRCH;
1222 		goto tifpout;
1223 	}
1224 	pident = proc_ident(proc);
1225 	is_current_proc = (proc == current_proc());
1226 
1227 	if (!(task_for_pid_posix_check(proc))) {
1228 		error = EPERM;
1229 		goto tifpout;
1230 	}
1231 
1232 	task_insp = proc_task(proc);
1233 	if (task_insp == TASK_INSPECT_NULL) {
1234 		goto tifpout;
1235 	}
1236 
1237 	/*
1238 	 * Grab a task reference and drop the proc reference before making any upcalls.
1239 	 */
1240 	task_reference(task_insp);
1241 
1242 	proc_rele(proc);
1243 	proc = PROC_NULL;
1244 
1245 #if CONFIG_MACF
1246 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1247 	if (error) {
1248 		error = EPERM;
1249 		goto tifpout;
1250 	}
1251 #endif
1252 
1253 	/* If we aren't root and target's task access port is set... */
1254 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1255 	    !is_current_proc &&
1256 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1257 	    (tfpport != IPC_PORT_NULL)) {
1258 		if (tfpport == IPC_PORT_DEAD) {
1259 			error = EACCES;
1260 			goto tifpout;
1261 		}
1262 
1263 
1264 		/* Call up to the task access server */
1265 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1266 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1267 
1268 		if (error != MACH_MSG_SUCCESS) {
1269 			if (error == MACH_RCV_INTERRUPTED) {
1270 				error = EINTR;
1271 			} else {
1272 				error = EPERM;
1273 			}
1274 			goto tifpout;
1275 		}
1276 	}
1277 
1278 	/* Check if the task has been corpsified */
1279 	if (task_is_a_corpse(task_insp)) {
1280 		error = EACCES;
1281 		goto tifpout;
1282 	}
1283 
1284 	/* could be IP_NULL, consumes a ref */
1285 	sright = (void*) convert_task_inspect_to_port(task_insp);
1286 	task_insp = TASK_INSPECT_NULL;
1287 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1288 
1289 tifpout:
1290 	task_deallocate(t1);
1291 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1292 	if (proc != PROC_NULL) {
1293 		proc_rele(proc);
1294 	}
1295 	if (tfpport != IPC_PORT_NULL) {
1296 		ipc_port_release_send(tfpport);
1297 	}
1298 	if (task_insp != TASK_INSPECT_NULL) {
1299 		task_deallocate(task_insp);
1300 	}
1301 
1302 	*ret = error;
1303 	return error;
1304 }
1305 
1306 /*
1307  *	Routine:	task_read_for_pid
1308  *	Purpose:
1309  *		Get the task read port for another "process", named by its
1310  *		process ID on the same host as "target_task".
1311  */
1312 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1313 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1314 {
1315 	mach_port_name_t        target_tport = args->target_tport;
1316 	int                     pid = args->pid;
1317 	user_addr_t             task_addr = args->t;
1318 
1319 	proc_t                  proc = PROC_NULL;
1320 	task_t                  t1 = TASK_NULL;
1321 	task_read_t             task_read = TASK_READ_NULL;
1322 	mach_port_name_t        tret = MACH_PORT_NULL;
1323 	ipc_port_t              tfpport = MACH_PORT_NULL;
1324 	int                     error = 0;
1325 	void                    *sright = NULL;
1326 	boolean_t               is_current_proc = FALSE;
1327 	struct proc_ident       pident = {0};
1328 
1329 	/* Disallow read port for kernel_task */
1330 	if (pid == 0) {
1331 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1332 		return EPERM;
1333 	}
1334 
1335 	t1 = port_name_to_task(target_tport);
1336 	if (t1 == TASK_NULL) {
1337 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1338 		return EINVAL;
1339 	}
1340 
1341 	proc = proc_find(pid);
1342 	if (proc == PROC_NULL) {
1343 		error = ESRCH;
1344 		goto trfpout;
1345 	}
1346 	pident = proc_ident(proc);
1347 	is_current_proc = (proc == current_proc());
1348 
1349 	if (!(task_for_pid_posix_check(proc))) {
1350 		error = EPERM;
1351 		goto trfpout;
1352 	}
1353 
1354 	task_read = proc_task(proc);
1355 	if (task_read == TASK_INSPECT_NULL) {
1356 		goto trfpout;
1357 	}
1358 
1359 	/*
1360 	 * Grab a task reference and drop the proc reference before making any upcalls.
1361 	 */
1362 	task_reference(task_read);
1363 
1364 	proc_rele(proc);
1365 	proc = PROC_NULL;
1366 
1367 #if CONFIG_MACF
1368 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1369 	if (error) {
1370 		error = EPERM;
1371 		goto trfpout;
1372 	}
1373 #endif
1374 
1375 	/* If we aren't root and target's task access port is set... */
1376 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1377 	    !is_current_proc &&
1378 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1379 	    (tfpport != IPC_PORT_NULL)) {
1380 		if (tfpport == IPC_PORT_DEAD) {
1381 			error = EACCES;
1382 			goto trfpout;
1383 		}
1384 
1385 
1386 		/* Call up to the task access server */
1387 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1388 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1389 
1390 		if (error != MACH_MSG_SUCCESS) {
1391 			if (error == MACH_RCV_INTERRUPTED) {
1392 				error = EINTR;
1393 			} else {
1394 				error = EPERM;
1395 			}
1396 			goto trfpout;
1397 		}
1398 	}
1399 
1400 	/* Check if the task has been corpsified */
1401 	if (task_is_a_corpse(task_read)) {
1402 		error = EACCES;
1403 		goto trfpout;
1404 	}
1405 
1406 	/* could be IP_NULL, consumes a ref */
1407 	sright = (void*) convert_task_read_to_port(task_read);
1408 	task_read = TASK_READ_NULL;
1409 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1410 
1411 trfpout:
1412 	task_deallocate(t1);
1413 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1414 	if (proc != PROC_NULL) {
1415 		proc_rele(proc);
1416 	}
1417 	if (tfpport != IPC_PORT_NULL) {
1418 		ipc_port_release_send(tfpport);
1419 	}
1420 	if (task_read != TASK_READ_NULL) {
1421 		task_deallocate(task_read);
1422 	}
1423 
1424 	*ret = error;
1425 	return error;
1426 }
1427 
1428 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1429 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1430 {
1431 	task_t  target = NULL;
1432 	proc_t  targetproc = PROC_NULL;
1433 	int     pid = args->pid;
1434 	int     error = 0;
1435 	mach_port_t tfpport = MACH_PORT_NULL;
1436 
1437 	if (pid == 0) {
1438 		error = EPERM;
1439 		goto out;
1440 	}
1441 
1442 	targetproc = proc_find(pid);
1443 	if (targetproc == PROC_NULL) {
1444 		error = ESRCH;
1445 		goto out;
1446 	}
1447 
1448 	if (!task_for_pid_posix_check(targetproc) &&
1449 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1450 		error = EPERM;
1451 		goto out;
1452 	}
1453 
1454 #if CONFIG_MACF
1455 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1456 	if (error) {
1457 		error = EPERM;
1458 		goto out;
1459 	}
1460 #endif
1461 
1462 	target = proc_task(targetproc);
1463 #if XNU_TARGET_OS_OSX
1464 	if (target != TASK_NULL) {
1465 		/* If we aren't root and target's task access port is set... */
1466 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1467 		    targetproc != current_proc() &&
1468 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1469 		    (tfpport != IPC_PORT_NULL)) {
1470 			if (tfpport == IPC_PORT_DEAD) {
1471 				error = EACCES;
1472 				goto out;
1473 			}
1474 
1475 			/* Call up to the task access server */
1476 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1477 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1478 
1479 			if (error != MACH_MSG_SUCCESS) {
1480 				if (error == MACH_RCV_INTERRUPTED) {
1481 					error = EINTR;
1482 				} else {
1483 					error = EPERM;
1484 				}
1485 				goto out;
1486 			}
1487 		}
1488 	}
1489 #endif /* XNU_TARGET_OS_OSX */
1490 
1491 	task_reference(target);
1492 	error = task_pidsuspend(target);
1493 	if (error) {
1494 		if (error == KERN_INVALID_ARGUMENT) {
1495 			error = EINVAL;
1496 		} else {
1497 			error = EPERM;
1498 		}
1499 	}
1500 #if CONFIG_MEMORYSTATUS
1501 	else {
1502 		memorystatus_on_suspend(targetproc);
1503 	}
1504 #endif
1505 
1506 	task_deallocate(target);
1507 
1508 out:
1509 	if (tfpport != IPC_PORT_NULL) {
1510 		ipc_port_release_send(tfpport);
1511 	}
1512 
1513 	if (targetproc != PROC_NULL) {
1514 		proc_rele(targetproc);
1515 	}
1516 	*ret = error;
1517 	return error;
1518 }
1519 
1520 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1521 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1522 {
1523 	mach_port_name_t        target_tport = args->target_tport;
1524 	int                     pid = args->pid;
1525 	user_addr_t             task_addr = args->t;
1526 	proc_t                  p = PROC_NULL;
1527 	task_t                  t1 = TASK_NULL;
1528 	task_t                  task = TASK_NULL;
1529 	mach_port_name_t        tret = MACH_PORT_NULL;
1530 	ipc_port_t              tfpport = MACH_PORT_NULL;
1531 	ipc_port_t              sright = NULL;
1532 	int                     error = 0;
1533 	boolean_t               is_current_proc = FALSE;
1534 	struct proc_ident       pident = {0};
1535 
1536 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1537 	AUDIT_ARG(pid, pid);
1538 	AUDIT_ARG(mach_port1, target_tport);
1539 
1540 	/* Always check if pid == 0 */
1541 	if (pid == 0) {
1542 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1543 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1544 		return KERN_FAILURE;
1545 	}
1546 
1547 	t1 = port_name_to_task(target_tport);
1548 	if (t1 == TASK_NULL) {
1549 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1550 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1551 		return KERN_FAILURE;
1552 	}
1553 
1554 	p = proc_find(pid);
1555 	if (p == PROC_NULL) {
1556 		error = KERN_FAILURE;
1557 		goto tfpout;
1558 	}
1559 	pident = proc_ident(p);
1560 	is_current_proc = (p == current_proc());
1561 
1562 #if CONFIG_AUDIT
1563 	AUDIT_ARG(process, p);
1564 #endif
1565 
1566 	if (!(task_for_pid_posix_check(p))) {
1567 		error = KERN_FAILURE;
1568 		goto tfpout;
1569 	}
1570 
1571 	if (proc_task(p) == TASK_NULL) {
1572 		error = KERN_SUCCESS;
1573 		goto tfpout;
1574 	}
1575 
1576 	/*
1577 	 * Grab a task reference and drop the proc reference before making any upcalls.
1578 	 */
1579 	task = proc_task(p);
1580 	task_reference(task);
1581 
1582 	proc_rele(p);
1583 	p = PROC_NULL;
1584 
1585 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1586 #if CONFIG_MACF
1587 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1588 		if (error) {
1589 			error = KERN_FAILURE;
1590 			goto tfpout;
1591 		}
1592 #endif
1593 
1594 		/* If we aren't root and target's task access port is set... */
1595 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1596 		    !is_current_proc &&
1597 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1598 		    (tfpport != IPC_PORT_NULL)) {
1599 			if (tfpport == IPC_PORT_DEAD) {
1600 				error = KERN_PROTECTION_FAILURE;
1601 				goto tfpout;
1602 			}
1603 
1604 
1605 			/* Call up to the task access server */
1606 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1607 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1608 
1609 			if (error != MACH_MSG_SUCCESS) {
1610 				if (error == MACH_RCV_INTERRUPTED) {
1611 					error = KERN_ABORTED;
1612 				} else {
1613 					error = KERN_FAILURE;
1614 				}
1615 				goto tfpout;
1616 			}
1617 		}
1618 	}
1619 
1620 	/* Check if the task has been corpsified */
1621 	if (task_is_a_corpse(task)) {
1622 		error = KERN_FAILURE;
1623 		goto tfpout;
1624 	}
1625 
1626 	error = task_get_debug_control_port(task, &sright);
1627 	if (error != KERN_SUCCESS) {
1628 		goto tfpout;
1629 	}
1630 
1631 	tret = ipc_port_copyout_send(
1632 		sright,
1633 		get_task_ipcspace(current_task()));
1634 
1635 	error = KERN_SUCCESS;
1636 
1637 tfpout:
1638 	task_deallocate(t1);
1639 	AUDIT_ARG(mach_port2, tret);
1640 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1641 
1642 	if (tfpport != IPC_PORT_NULL) {
1643 		ipc_port_release_send(tfpport);
1644 	}
1645 	if (task != TASK_NULL) {
1646 		task_deallocate(task);
1647 	}
1648 	if (p != PROC_NULL) {
1649 		proc_rele(p);
1650 	}
1651 	AUDIT_MACH_SYSCALL_EXIT(error);
1652 	return error;
1653 }
1654 
1655 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1656 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1657 {
1658 	task_t  target = NULL;
1659 	proc_t  targetproc = PROC_NULL;
1660 	int     pid = args->pid;
1661 	int     error = 0;
1662 	mach_port_t tfpport = MACH_PORT_NULL;
1663 
1664 	if (pid == 0) {
1665 		error = EPERM;
1666 		goto out;
1667 	}
1668 
1669 	targetproc = proc_find(pid);
1670 	if (targetproc == PROC_NULL) {
1671 		error = ESRCH;
1672 		goto out;
1673 	}
1674 
1675 	if (!task_for_pid_posix_check(targetproc) &&
1676 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1677 		error = EPERM;
1678 		goto out;
1679 	}
1680 
1681 #if CONFIG_MACF
1682 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1683 	if (error) {
1684 		error = EPERM;
1685 		goto out;
1686 	}
1687 #endif
1688 
1689 	target = proc_task(targetproc);
1690 #if XNU_TARGET_OS_OSX
1691 	if (target != TASK_NULL) {
1692 		/* If we aren't root and target's task access port is set... */
1693 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1694 		    targetproc != current_proc() &&
1695 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1696 		    (tfpport != IPC_PORT_NULL)) {
1697 			if (tfpport == IPC_PORT_DEAD) {
1698 				error = EACCES;
1699 				goto out;
1700 			}
1701 
1702 			/* Call up to the task access server */
1703 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1704 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1705 
1706 			if (error != MACH_MSG_SUCCESS) {
1707 				if (error == MACH_RCV_INTERRUPTED) {
1708 					error = EINTR;
1709 				} else {
1710 					error = EPERM;
1711 				}
1712 				goto out;
1713 			}
1714 		}
1715 	}
1716 #endif /* XNU_TARGET_OS_OSX */
1717 
1718 #if !XNU_TARGET_OS_OSX
1719 #if SOCKETS
1720 	resume_proc_sockets(targetproc);
1721 #endif /* SOCKETS */
1722 #endif /* !XNU_TARGET_OS_OSX */
1723 
1724 	task_reference(target);
1725 
1726 #if CONFIG_MEMORYSTATUS
1727 	memorystatus_on_resume(targetproc);
1728 #endif
1729 
1730 	error = task_pidresume(target);
1731 	if (error) {
1732 		if (error == KERN_INVALID_ARGUMENT) {
1733 			error = EINVAL;
1734 		} else {
1735 			if (error == KERN_MEMORY_ERROR) {
1736 				psignal(targetproc, SIGKILL);
1737 				error = EIO;
1738 			} else {
1739 				error = EPERM;
1740 			}
1741 		}
1742 	}
1743 
1744 	task_deallocate(target);
1745 
1746 out:
1747 	if (tfpport != IPC_PORT_NULL) {
1748 		ipc_port_release_send(tfpport);
1749 	}
1750 
1751 	if (targetproc != PROC_NULL) {
1752 		proc_rele(targetproc);
1753 	}
1754 
1755 	*ret = error;
1756 	return error;
1757 }
1758 
1759 #if !XNU_TARGET_OS_OSX
1760 /*
1761  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1762  * When a process is specified, this call is blocking, otherwise we wake up the
1763  * freezer thread and do not block on a process being frozen.
1764  */
1765 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1766 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1767 {
1768 	int     error = 0;
1769 	proc_t  targetproc = PROC_NULL;
1770 	int     pid = args->pid;
1771 
1772 #ifndef CONFIG_FREEZE
1773 	#pragma unused(pid)
1774 #else
1775 
1776 	/*
1777 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1778 	 */
1779 
1780 	if (pid >= 0) {
1781 		targetproc = proc_find(pid);
1782 
1783 		if (targetproc == PROC_NULL) {
1784 			error = ESRCH;
1785 			goto out;
1786 		}
1787 
1788 		if (!task_for_pid_posix_check(targetproc)) {
1789 			error = EPERM;
1790 			goto out;
1791 		}
1792 	}
1793 
1794 #if CONFIG_MACF
1795 	//Note that targetproc may be null
1796 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1797 	if (error) {
1798 		error = EPERM;
1799 		goto out;
1800 	}
1801 #endif
1802 
1803 	if (pid == -2) {
1804 		vm_pageout_anonymous_pages();
1805 	} else if (pid == -1) {
1806 		memorystatus_on_inactivity(targetproc);
1807 	} else {
1808 		error = memorystatus_freeze_process_sync(targetproc);
1809 	}
1810 
1811 out:
1812 
1813 #endif /* CONFIG_FREEZE */
1814 
1815 	if (targetproc != PROC_NULL) {
1816 		proc_rele(targetproc);
1817 	}
1818 	*ret = error;
1819 	return error;
1820 }
1821 #endif /* !XNU_TARGET_OS_OSX */
1822 
1823 #if SOCKETS
1824 int
networking_memstatus_callout(proc_t p,uint32_t status)1825 networking_memstatus_callout(proc_t p, uint32_t status)
1826 {
1827 	struct fileproc *fp;
1828 
1829 	/*
1830 	 * proc list lock NOT held
1831 	 * proc lock NOT held
1832 	 * a reference on the proc has been held / shall be dropped by the caller.
1833 	 */
1834 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1835 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1836 
1837 	proc_fdlock(p);
1838 
1839 	fdt_foreach(fp, p) {
1840 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1841 #if NECP
1842 		case DTYPE_NETPOLICY:
1843 			necp_fd_memstatus(p, status,
1844 			    (struct necp_fd_data *)fp_get_data(fp));
1845 			break;
1846 #endif /* NECP */
1847 #if SKYWALK
1848 		case DTYPE_CHANNEL:
1849 			kern_channel_memstatus(p, status,
1850 			    (struct kern_channel *)fp_get_data(fp));
1851 			break;
1852 #endif /* SKYWALK */
1853 		default:
1854 			break;
1855 		}
1856 	}
1857 	proc_fdunlock(p);
1858 
1859 	return 1;
1860 }
1861 
1862 #if SKYWALK
1863 /*
1864  * Since we make multiple passes across the fileproc array, record the
1865  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1866  * large enough to accomodate most, if not all cases.  If we find more,
1867  * we'll go to the slow path during second pass.
1868  */
1869 #define MAX_CHANNELS    8       /* should be more than enough */
1870 #endif /* SKYWALK */
1871 
1872 static int
networking_defunct_callout(proc_t p,void * arg)1873 networking_defunct_callout(proc_t p, void *arg)
1874 {
1875 	struct pid_shutdown_sockets_args *args = arg;
1876 	int pid = args->pid;
1877 	int level = args->level;
1878 	struct fileproc *fp;
1879 #if SKYWALK
1880 	int i;
1881 	int channel_count = 0;
1882 	struct kern_channel *channel_array[MAX_CHANNELS];
1883 
1884 	bzero(&channel_array, sizeof(channel_array));
1885 #endif /* SKYWALK */
1886 
1887 	proc_fdlock(p);
1888 
1889 	fdt_foreach(fp, p) {
1890 		struct fileglob *fg = fp->fp_glob;
1891 
1892 		switch (FILEGLOB_DTYPE(fg)) {
1893 		case DTYPE_SOCKET: {
1894 			struct socket *so = (struct socket *)fg_get_data(fg);
1895 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1896 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1897 				/* Call networking stack with socket and level */
1898 				(void)socket_defunct(p, so, level);
1899 			}
1900 			break;
1901 		}
1902 #if NECP
1903 		case DTYPE_NETPOLICY:
1904 			/* first pass: defunct necp and get stats for ntstat */
1905 			if (proc_getpid(p) == pid) {
1906 				necp_fd_defunct(p,
1907 				    (struct necp_fd_data *)fg_get_data(fg));
1908 			}
1909 			break;
1910 #endif /* NECP */
1911 #if SKYWALK
1912 		case DTYPE_CHANNEL:
1913 			/* first pass: get channels and total count */
1914 			if (proc_getpid(p) == pid) {
1915 				if (channel_count < MAX_CHANNELS) {
1916 					channel_array[channel_count] =
1917 					    (struct kern_channel *)fg_get_data(fg);
1918 				}
1919 				++channel_count;
1920 			}
1921 			break;
1922 #endif /* SKYWALK */
1923 		default:
1924 			break;
1925 		}
1926 	}
1927 
1928 #if SKYWALK
1929 	/*
1930 	 * Second pass: defunct channels/flows (after NECP).  Handle
1931 	 * the common case of up to MAX_CHANNELS count with fast path,
1932 	 * and traverse the fileproc array again only if we exceed it.
1933 	 */
1934 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1935 		ASSERT(proc_getpid(p) == pid);
1936 		for (i = 0; i < channel_count; i++) {
1937 			ASSERT(channel_array[i] != NULL);
1938 			kern_channel_defunct(p, channel_array[i]);
1939 		}
1940 	} else if (channel_count != 0) {
1941 		ASSERT(proc_getpid(p) == pid);
1942 		fdt_foreach(fp, p) {
1943 			struct fileglob *fg = fp->fp_glob;
1944 
1945 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1946 				kern_channel_defunct(p,
1947 				    (struct kern_channel *)fg_get_data(fg));
1948 			}
1949 		}
1950 	}
1951 #endif /* SKYWALK */
1952 	proc_fdunlock(p);
1953 
1954 	return PROC_RETURNED;
1955 }
1956 
1957 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1958 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1959 {
1960 	int                             error = 0;
1961 	proc_t                          targetproc = PROC_NULL;
1962 	int                             pid = args->pid;
1963 	int                             level = args->level;
1964 
1965 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1966 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1967 		error = EINVAL;
1968 		goto out;
1969 	}
1970 
1971 	targetproc = proc_find(pid);
1972 	if (targetproc == PROC_NULL) {
1973 		error = ESRCH;
1974 		goto out;
1975 	}
1976 
1977 	if (!task_for_pid_posix_check(targetproc) &&
1978 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1979 		error = EPERM;
1980 		goto out;
1981 	}
1982 
1983 #if CONFIG_MACF
1984 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1985 	if (error) {
1986 		error = EPERM;
1987 		goto out;
1988 	}
1989 #endif
1990 
1991 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1992 	    networking_defunct_callout, args, NULL, NULL);
1993 
1994 out:
1995 	if (targetproc != PROC_NULL) {
1996 		proc_rele(targetproc);
1997 	}
1998 	*ret = error;
1999 	return error;
2000 }
2001 
2002 #endif /* SOCKETS */
2003 
2004 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)2005 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2006     __unused int arg2, struct sysctl_req *req)
2007 {
2008 	int error = 0;
2009 	int new_value;
2010 
2011 	error = SYSCTL_OUT(req, arg1, sizeof(int));
2012 	if (error || req->newptr == USER_ADDR_NULL) {
2013 		return error;
2014 	}
2015 
2016 	if (!kauth_cred_issuser(kauth_cred_get())) {
2017 		return EPERM;
2018 	}
2019 
2020 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2021 		goto out;
2022 	}
2023 	if ((new_value == KERN_TFP_POLICY_DENY)
2024 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2025 		tfp_policy = new_value;
2026 	} else {
2027 		error = EINVAL;
2028 	}
2029 out:
2030 	return error;
2031 }
2032 
2033 #if defined(SECURE_KERNEL)
2034 static int kern_secure_kernel = 1;
2035 #else
2036 static int kern_secure_kernel = 0;
2037 #endif
2038 
2039 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2040 
2041 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2042 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2043     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2044 
2045 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2046     &shared_region_trace_level, 0, "");
2047 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2048     &shared_region_version, 0, "");
2049 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2050     &shared_region_persistence, 0, "");
2051 
2052 /*
2053  * shared_region_check_np:
2054  *
2055  * This system call is intended for dyld.
2056  *
2057  * dyld calls this when any process starts to see if the process's shared
2058  * region is already set up and ready to use.
2059  * This call returns the base address of the first mapping in the
2060  * process's shared region's first mapping.
2061  * dyld will then check what's mapped at that address.
2062  *
2063  * If the shared region is empty, dyld will then attempt to map the shared
2064  * cache file in the shared region via the shared_region_map_np() system call.
2065  *
2066  * If something's already mapped in the shared region, dyld will check if it
2067  * matches the shared cache it would like to use for that process.
2068  * If it matches, evrything's ready and the process can proceed and use the
2069  * shared region.
2070  * If it doesn't match, dyld will unmap the shared region and map the shared
2071  * cache into the process's address space via mmap().
2072  *
2073  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2074  * the shared region. We will remove the shared_region reference from the task.
2075  *
2076  * ERROR VALUES
2077  * EINVAL	no shared region
2078  * ENOMEM	shared region is empty
2079  * EFAULT	bad address for "start_address"
2080  */
2081 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2082 shared_region_check_np(
2083 	__unused struct proc                    *p,
2084 	struct shared_region_check_np_args      *uap,
2085 	__unused int                            *retvalp)
2086 {
2087 	vm_shared_region_t      shared_region;
2088 	mach_vm_offset_t        start_address = 0;
2089 	int                     error = 0;
2090 	kern_return_t           kr;
2091 	task_t                  task = current_task();
2092 
2093 	SHARED_REGION_TRACE_DEBUG(
2094 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2095 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2096 		proc_getpid(p), p->p_comm,
2097 		(uint64_t)uap->start_address));
2098 
2099 	/*
2100 	 * Special value of start_address used to indicate that map_with_linking() should
2101 	 * no longer be allowed in this process
2102 	 */
2103 	if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2104 		p->p_disallow_map_with_linking = TRUE;
2105 		return 0;
2106 	}
2107 
2108 	/* retrieve the current tasks's shared region */
2109 	shared_region = vm_shared_region_get(task);
2110 	if (shared_region != NULL) {
2111 		/*
2112 		 * A NULL argument is used by dyld to indicate the task
2113 		 * has unmapped its shared region.
2114 		 */
2115 		if (uap->start_address == 0) {
2116 			/* unmap it first */
2117 			vm_shared_region_remove(task, shared_region);
2118 			vm_shared_region_set(task, NULL);
2119 		} else {
2120 			/* retrieve address of its first mapping... */
2121 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2122 			if (kr != KERN_SUCCESS) {
2123 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2124 				    "check_np(0x%llx) "
2125 				    "vm_shared_region_start_address() failed\n",
2126 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2127 				    proc_getpid(p), p->p_comm,
2128 				    (uint64_t)uap->start_address));
2129 				error = ENOMEM;
2130 			} else {
2131 #if __has_feature(ptrauth_calls)
2132 				/*
2133 				 * Remap any section of the shared library that
2134 				 * has authenticated pointers into private memory.
2135 				 */
2136 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2137 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2138 					    "check_np(0x%llx) "
2139 					    "vm_shared_region_auth_remap() failed\n",
2140 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2141 					    proc_getpid(p), p->p_comm,
2142 					    (uint64_t)uap->start_address));
2143 					error = ENOMEM;
2144 				}
2145 #endif /* __has_feature(ptrauth_calls) */
2146 
2147 				/* ... and give it to the caller */
2148 				if (error == 0) {
2149 					error = copyout(&start_address,
2150 					    (user_addr_t) uap->start_address,
2151 					    sizeof(start_address));
2152 					if (error != 0) {
2153 						SHARED_REGION_TRACE_ERROR(
2154 							("shared_region: %p [%d(%s)] "
2155 							"check_np(0x%llx) "
2156 							"copyout(0x%llx) error %d\n",
2157 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2158 							proc_getpid(p), p->p_comm,
2159 							(uint64_t)uap->start_address, (uint64_t)start_address,
2160 							error));
2161 					}
2162 				}
2163 			}
2164 		}
2165 		vm_shared_region_deallocate(shared_region);
2166 	} else {
2167 		/* no shared region ! */
2168 		error = EINVAL;
2169 	}
2170 
2171 	SHARED_REGION_TRACE_DEBUG(
2172 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2173 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2174 		proc_getpid(p), p->p_comm,
2175 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2176 
2177 	return error;
2178 }
2179 
2180 
2181 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2182 shared_region_copyin(
2183 	struct proc  *p,
2184 	user_addr_t  user_addr,
2185 	unsigned int count,
2186 	unsigned int element_size,
2187 	void         *kernel_data)
2188 {
2189 	int             error = 0;
2190 	vm_size_t       size = count * element_size;
2191 
2192 	error = copyin(user_addr, kernel_data, size);
2193 	if (error) {
2194 		SHARED_REGION_TRACE_ERROR(
2195 			("shared_region: %p [%d(%s)] map(): "
2196 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2197 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2198 			proc_getpid(p), p->p_comm,
2199 			(uint64_t)user_addr, (long)size, error));
2200 	}
2201 	return error;
2202 }
2203 
2204 /*
2205  * A reasonable upper limit to prevent overflow of allocation/copyin.
2206  */
2207 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2208 
2209 /* forward declaration */
2210 __attribute__((noinline))
2211 static void shared_region_map_and_slide_cleanup(
2212 	struct proc              *p,
2213 	uint32_t                 files_count,
2214 	struct _sr_file_mappings *sr_file_mappings,
2215 	struct vm_shared_region  *shared_region);
2216 
2217 /*
2218  * Setup part of _shared_region_map_and_slide().
2219  * It had to be broken out of _shared_region_map_and_slide() to
2220  * prevent compiler inlining from blowing out the stack.
2221  */
2222 __attribute__((noinline))
2223 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2224 shared_region_map_and_slide_setup(
2225 	struct proc                         *p,
2226 	uint32_t                            files_count,
2227 	struct shared_file_np               *files,
2228 	uint32_t                            mappings_count,
2229 	struct shared_file_mapping_slide_np *mappings,
2230 	struct _sr_file_mappings            **sr_file_mappings,
2231 	struct vm_shared_region             **shared_region_ptr,
2232 	struct vnode                        *rdir_vp)
2233 {
2234 	int                             error = 0;
2235 	struct _sr_file_mappings        *srfmp;
2236 	uint32_t                        mappings_next;
2237 	struct vnode_attr               va;
2238 	off_t                           fs;
2239 #if CONFIG_MACF
2240 	vm_prot_t                       maxprot = VM_PROT_ALL;
2241 #endif
2242 	uint32_t                        i;
2243 	struct vm_shared_region         *shared_region = NULL;
2244 	boolean_t                       is_driverkit = task_is_driver(current_task());
2245 
2246 	SHARED_REGION_TRACE_DEBUG(
2247 		("shared_region: %p [%d(%s)] -> map\n",
2248 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2249 		proc_getpid(p), p->p_comm));
2250 
2251 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2252 		error = E2BIG;
2253 		goto done;
2254 	}
2255 	if (files_count == 0) {
2256 		error = EINVAL;
2257 		goto done;
2258 	}
2259 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2260 	    Z_WAITOK | Z_ZERO);
2261 	if (*sr_file_mappings == NULL) {
2262 		error = ENOMEM;
2263 		goto done;
2264 	}
2265 	mappings_next = 0;
2266 	for (i = 0; i < files_count; i++) {
2267 		srfmp = &(*sr_file_mappings)[i];
2268 		srfmp->fd = files[i].sf_fd;
2269 		srfmp->mappings_count = files[i].sf_mappings_count;
2270 		srfmp->mappings = &mappings[mappings_next];
2271 		mappings_next += srfmp->mappings_count;
2272 		if (mappings_next > mappings_count) {
2273 			error = EINVAL;
2274 			goto done;
2275 		}
2276 		srfmp->slide = files[i].sf_slide;
2277 	}
2278 
2279 	/* get the process's shared region (setup in vm_map_exec()) */
2280 	shared_region = vm_shared_region_trim_and_get(current_task());
2281 	*shared_region_ptr = shared_region;
2282 	if (shared_region == NULL) {
2283 		SHARED_REGION_TRACE_ERROR(
2284 			("shared_region: %p [%d(%s)] map(): "
2285 			"no shared region\n",
2286 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2287 			proc_getpid(p), p->p_comm));
2288 		error = EINVAL;
2289 		goto done;
2290 	}
2291 
2292 	/*
2293 	 * Check the shared region matches the current root
2294 	 * directory of this process.  Deny the mapping to
2295 	 * avoid tainting the shared region with something that
2296 	 * doesn't quite belong into it.
2297 	 */
2298 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2299 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2300 		SHARED_REGION_TRACE_ERROR(
2301 			("shared_region: map(%p) root_dir mismatch\n",
2302 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2303 		error = EPERM;
2304 		goto done;
2305 	}
2306 
2307 
2308 	for (srfmp = &(*sr_file_mappings)[0];
2309 	    srfmp < &(*sr_file_mappings)[files_count];
2310 	    srfmp++) {
2311 		if (srfmp->mappings_count == 0) {
2312 			/* no mappings here... */
2313 			continue;
2314 		}
2315 
2316 		/*
2317 		 * A file descriptor of -1 is used to indicate that the data
2318 		 * to be put in the shared region for this mapping comes directly
2319 		 * from the processes address space. Ensure we have proper alignments.
2320 		 */
2321 		if (srfmp->fd == -1) {
2322 			/* only allow one mapping per fd */
2323 			if (srfmp->mappings_count > 1) {
2324 				SHARED_REGION_TRACE_ERROR(
2325 					("shared_region: %p [%d(%s)] map data >1 mapping\n",
2326 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2327 					proc_getpid(p), p->p_comm));
2328 				error = EINVAL;
2329 				goto done;
2330 			}
2331 
2332 			/*
2333 			 * The destination address and size must be page aligned.
2334 			 */
2335 			struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2336 			mach_vm_address_t dest_addr = mapping->sms_address;
2337 			mach_vm_size_t    map_size = mapping->sms_size;
2338 			if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2339 				SHARED_REGION_TRACE_ERROR(
2340 					("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2341 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2342 					proc_getpid(p), p->p_comm, dest_addr));
2343 				error = EINVAL;
2344 				goto done;
2345 			}
2346 			if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2347 				SHARED_REGION_TRACE_ERROR(
2348 					("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2349 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2350 					proc_getpid(p), p->p_comm, map_size));
2351 				error = EINVAL;
2352 				goto done;
2353 			}
2354 			continue;
2355 		}
2356 
2357 		/* get file structure from file descriptor */
2358 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2359 		if (error) {
2360 			SHARED_REGION_TRACE_ERROR(
2361 				("shared_region: %p [%d(%s)] map: "
2362 				"fd=%d lookup failed (error=%d)\n",
2363 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2364 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2365 			goto done;
2366 		}
2367 
2368 		/* we need at least read permission on the file */
2369 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2370 			SHARED_REGION_TRACE_ERROR(
2371 				("shared_region: %p [%d(%s)] map: "
2372 				"fd=%d not readable\n",
2373 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2374 				proc_getpid(p), p->p_comm, srfmp->fd));
2375 			error = EPERM;
2376 			goto done;
2377 		}
2378 
2379 		/* get vnode from file structure */
2380 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2381 		if (error) {
2382 			SHARED_REGION_TRACE_ERROR(
2383 				("shared_region: %p [%d(%s)] map: "
2384 				"fd=%d getwithref failed (error=%d)\n",
2385 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2386 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2387 			goto done;
2388 		}
2389 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2390 
2391 		/* make sure the vnode is a regular file */
2392 		if (srfmp->vp->v_type != VREG) {
2393 			SHARED_REGION_TRACE_ERROR(
2394 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2395 				"not a file (type=%d)\n",
2396 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2397 				proc_getpid(p), p->p_comm,
2398 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2399 				srfmp->vp->v_name, srfmp->vp->v_type));
2400 			error = EINVAL;
2401 			goto done;
2402 		}
2403 
2404 #if CONFIG_MACF
2405 		/* pass in 0 for the offset argument because AMFI does not need the offset
2406 		 *       of the shared cache */
2407 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2408 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2409 		if (error) {
2410 			goto done;
2411 		}
2412 #endif /* MAC */
2413 
2414 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2415 		/*
2416 		 * Check if the shared cache is in the trust cache;
2417 		 * if so, we can skip the root ownership check.
2418 		 */
2419 #if DEVELOPMENT || DEBUG
2420 		/*
2421 		 * Skip both root ownership and trust cache check if
2422 		 * enforcement is disabled.
2423 		 */
2424 		if (!cs_system_enforcement()) {
2425 			goto after_root_check;
2426 		}
2427 #endif /* DEVELOPMENT || DEBUG */
2428 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2429 		if (blob == NULL) {
2430 			SHARED_REGION_TRACE_ERROR(
2431 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2432 				"missing CS blob\n",
2433 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2434 				proc_getpid(p), p->p_comm,
2435 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2436 				srfmp->vp->v_name));
2437 			goto root_check;
2438 		}
2439 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2440 		if (cdhash == NULL) {
2441 			SHARED_REGION_TRACE_ERROR(
2442 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2443 				"missing cdhash\n",
2444 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2445 				proc_getpid(p), p->p_comm,
2446 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2447 				srfmp->vp->v_name));
2448 			goto root_check;
2449 		}
2450 
2451 		bool in_trust_cache = false;
2452 		TrustCacheQueryToken_t qt;
2453 		if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2454 			TCType_t tc_type = kTCTypeInvalid;
2455 			TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2456 			in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2457 			    (tc_type == kTCTypeCryptex1BootOS ||
2458 			    tc_type == kTCTypeStatic ||
2459 			    tc_type == kTCTypeEngineering));
2460 		}
2461 		if (!in_trust_cache) {
2462 			SHARED_REGION_TRACE_ERROR(
2463 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2464 				"not in trust cache\n",
2465 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2466 				proc_getpid(p), p->p_comm,
2467 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2468 				srfmp->vp->v_name));
2469 			goto root_check;
2470 		}
2471 		goto after_root_check;
2472 root_check:
2473 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2474 
2475 		/* The shared cache file must be owned by root */
2476 		VATTR_INIT(&va);
2477 		VATTR_WANTED(&va, va_uid);
2478 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2479 		if (error) {
2480 			SHARED_REGION_TRACE_ERROR(
2481 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2482 				"vnode_getattr(%p) failed (error=%d)\n",
2483 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2484 				proc_getpid(p), p->p_comm,
2485 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2486 				srfmp->vp->v_name,
2487 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2488 				error));
2489 			goto done;
2490 		}
2491 		if (va.va_uid != 0) {
2492 			SHARED_REGION_TRACE_ERROR(
2493 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2494 				"owned by uid=%d instead of 0\n",
2495 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2496 				proc_getpid(p), p->p_comm,
2497 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2498 				srfmp->vp->v_name, va.va_uid));
2499 			error = EPERM;
2500 			goto done;
2501 		}
2502 
2503 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2504 after_root_check:
2505 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2506 
2507 #if CONFIG_CSR
2508 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2509 			VATTR_INIT(&va);
2510 			VATTR_WANTED(&va, va_flags);
2511 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2512 			if (error) {
2513 				SHARED_REGION_TRACE_ERROR(
2514 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2515 					"vnode_getattr(%p) failed (error=%d)\n",
2516 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2517 					proc_getpid(p), p->p_comm,
2518 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2519 					srfmp->vp->v_name,
2520 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2521 					error));
2522 				goto done;
2523 			}
2524 
2525 			if (!(va.va_flags & SF_RESTRICTED)) {
2526 				/*
2527 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2528 				 * the shared cache file is NOT SIP-protected, so reject the
2529 				 * mapping request
2530 				 */
2531 				SHARED_REGION_TRACE_ERROR(
2532 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2533 					"vnode is not SIP-protected. \n",
2534 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2535 					proc_getpid(p), p->p_comm,
2536 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2537 					srfmp->vp->v_name));
2538 				error = EPERM;
2539 				goto done;
2540 			}
2541 		}
2542 #else /* CONFIG_CSR */
2543 
2544 		/*
2545 		 * Devices without SIP/ROSP need to make sure that the shared cache
2546 		 * is either on the root volume or in the preboot cryptex volume.
2547 		 */
2548 		assert(rdir_vp != NULL);
2549 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2550 			vnode_t preboot_vp = NULL;
2551 #if XNU_TARGET_OS_OSX
2552 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2553 #else
2554 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2555 #endif
2556 			error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2557 			if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2558 				SHARED_REGION_TRACE_ERROR(
2559 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2560 					"not on process' root volume nor preboot volume\n",
2561 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2562 					proc_getpid(p), p->p_comm,
2563 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2564 					srfmp->vp->v_name));
2565 				error = EPERM;
2566 				if (preboot_vp) {
2567 					(void)vnode_put(preboot_vp);
2568 				}
2569 				goto done;
2570 			} else if (preboot_vp) {
2571 				(void)vnode_put(preboot_vp);
2572 			}
2573 		}
2574 #endif /* CONFIG_CSR */
2575 
2576 		if (scdir_enforce) {
2577 			char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2578 			struct vnode *scdir_vp = NULL;
2579 			for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2580 			    *expected_scdir_path != NULL;
2581 			    expected_scdir_path++) {
2582 				/* get vnode for expected_scdir_path */
2583 				error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2584 				if (error) {
2585 					SHARED_REGION_TRACE_ERROR(
2586 						("shared_region: %p [%d(%s)]: "
2587 						"vnode_lookup(%s) failed (error=%d)\n",
2588 						(void *)VM_KERNEL_ADDRPERM(current_thread()),
2589 						proc_getpid(p), p->p_comm,
2590 						*expected_scdir_path, error));
2591 					continue;
2592 				}
2593 
2594 				/* check if parent is scdir_vp */
2595 				assert(scdir_vp != NULL);
2596 				if (vnode_parent(srfmp->vp) == scdir_vp) {
2597 					(void)vnode_put(scdir_vp);
2598 					scdir_vp = NULL;
2599 					goto scdir_ok;
2600 				}
2601 				(void)vnode_put(scdir_vp);
2602 				scdir_vp = NULL;
2603 			}
2604 			/* nothing matches */
2605 			SHARED_REGION_TRACE_ERROR(
2606 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2607 				"shared cache file not in expected directory\n",
2608 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2609 				proc_getpid(p), p->p_comm,
2610 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2611 				srfmp->vp->v_name));
2612 			error = EPERM;
2613 			goto done;
2614 		}
2615 scdir_ok:
2616 
2617 		/* get vnode size */
2618 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2619 		if (error) {
2620 			SHARED_REGION_TRACE_ERROR(
2621 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2622 				"vnode_size(%p) failed (error=%d)\n",
2623 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2624 				proc_getpid(p), p->p_comm,
2625 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2626 				srfmp->vp->v_name,
2627 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2628 			goto done;
2629 		}
2630 		srfmp->file_size = fs;
2631 
2632 		/* get the file's memory object handle */
2633 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2634 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2635 			SHARED_REGION_TRACE_ERROR(
2636 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2637 				"no memory object\n",
2638 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2639 				proc_getpid(p), p->p_comm,
2640 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2641 				srfmp->vp->v_name));
2642 			error = EINVAL;
2643 			goto done;
2644 		}
2645 
2646 		/* check that the mappings are properly covered by code signatures */
2647 		if (!cs_system_enforcement()) {
2648 			/* code signing is not enforced: no need to check */
2649 		} else {
2650 			for (i = 0; i < srfmp->mappings_count; i++) {
2651 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2652 					/* zero-filled mapping: not backed by the file */
2653 					continue;
2654 				}
2655 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2656 				    srfmp->mappings[i].sms_file_offset,
2657 				    srfmp->mappings[i].sms_size)) {
2658 					/* this mapping is fully covered by code signatures */
2659 					continue;
2660 				}
2661 				SHARED_REGION_TRACE_ERROR(
2662 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2663 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2664 					"is not code-signed\n",
2665 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2666 					proc_getpid(p), p->p_comm,
2667 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2668 					srfmp->vp->v_name,
2669 					i, srfmp->mappings_count,
2670 					srfmp->mappings[i].sms_address,
2671 					srfmp->mappings[i].sms_size,
2672 					srfmp->mappings[i].sms_file_offset,
2673 					srfmp->mappings[i].sms_max_prot,
2674 					srfmp->mappings[i].sms_init_prot));
2675 				error = EINVAL;
2676 				goto done;
2677 			}
2678 		}
2679 	}
2680 done:
2681 	if (error != 0) {
2682 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2683 		*sr_file_mappings = NULL;
2684 		*shared_region_ptr = NULL;
2685 	}
2686 	return error;
2687 }
2688 
2689 /*
2690  * shared_region_map_np()
2691  *
2692  * This system call is intended for dyld.
2693  *
2694  * dyld uses this to map a shared cache file into a shared region.
2695  * This is usually done only the first time a shared cache is needed.
2696  * Subsequent processes will just use the populated shared region without
2697  * requiring any further setup.
2698  */
2699 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2700 _shared_region_map_and_slide(
2701 	struct proc                         *p,
2702 	uint32_t                            files_count,
2703 	struct shared_file_np               *files,
2704 	uint32_t                            mappings_count,
2705 	struct shared_file_mapping_slide_np *mappings)
2706 {
2707 	int                             error = 0;
2708 	kern_return_t                   kr = KERN_SUCCESS;
2709 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2710 	struct vnode                    *rdir_vp = NULL;
2711 	struct vm_shared_region         *shared_region = NULL;
2712 
2713 	/*
2714 	 * Get a reference to the current proc's root dir.
2715 	 * Need this to prevent racing with chroot.
2716 	 */
2717 	proc_fdlock(p);
2718 	rdir_vp = p->p_fd.fd_rdir;
2719 	if (rdir_vp == NULL) {
2720 		rdir_vp = rootvnode;
2721 	}
2722 	assert(rdir_vp != NULL);
2723 	vnode_get(rdir_vp);
2724 	proc_fdunlock(p);
2725 
2726 	/*
2727 	 * Turn files, mappings into sr_file_mappings and other setup.
2728 	 */
2729 	error = shared_region_map_and_slide_setup(p, files_count,
2730 	    files, mappings_count, mappings,
2731 	    &sr_file_mappings, &shared_region, rdir_vp);
2732 	if (error != 0) {
2733 		vnode_put(rdir_vp);
2734 		return error;
2735 	}
2736 
2737 	/* map the file(s) into that shared region's submap */
2738 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2739 	if (kr != KERN_SUCCESS) {
2740 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2741 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2742 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2743 		    proc_getpid(p), p->p_comm, kr));
2744 	}
2745 
2746 	/* convert kern_return_t to errno */
2747 	switch (kr) {
2748 	case KERN_SUCCESS:
2749 		error = 0;
2750 		break;
2751 	case KERN_INVALID_ADDRESS:
2752 		error = EFAULT;
2753 		break;
2754 	case KERN_PROTECTION_FAILURE:
2755 		error = EPERM;
2756 		break;
2757 	case KERN_NO_SPACE:
2758 		error = ENOMEM;
2759 		break;
2760 	case KERN_FAILURE:
2761 	case KERN_INVALID_ARGUMENT:
2762 	default:
2763 		error = EINVAL;
2764 		break;
2765 	}
2766 
2767 	/*
2768 	 * Mark that this process is now using split libraries.
2769 	 */
2770 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2771 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2772 	}
2773 
2774 	vnode_put(rdir_vp);
2775 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2776 
2777 	SHARED_REGION_TRACE_DEBUG(
2778 		("shared_region: %p [%d(%s)] <- map\n",
2779 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2780 		proc_getpid(p), p->p_comm));
2781 
2782 	return error;
2783 }
2784 
2785 /*
2786  * Clean up part of _shared_region_map_and_slide()
2787  * It had to be broken out of _shared_region_map_and_slide() to
2788  * prevent compiler inlining from blowing out the stack.
2789  */
2790 __attribute__((noinline))
2791 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2792 shared_region_map_and_slide_cleanup(
2793 	struct proc              *p,
2794 	uint32_t                 files_count,
2795 	struct _sr_file_mappings *sr_file_mappings,
2796 	struct vm_shared_region  *shared_region)
2797 {
2798 	struct _sr_file_mappings *srfmp;
2799 	struct vnode_attr        va;
2800 
2801 	if (sr_file_mappings != NULL) {
2802 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2803 			if (srfmp->vp != NULL) {
2804 				vnode_lock_spin(srfmp->vp);
2805 				srfmp->vp->v_flag |= VSHARED_DYLD;
2806 				vnode_unlock(srfmp->vp);
2807 
2808 				/* update the vnode's access time */
2809 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2810 					VATTR_INIT(&va);
2811 					nanotime(&va.va_access_time);
2812 					VATTR_SET_ACTIVE(&va, va_access_time);
2813 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2814 				}
2815 
2816 #if NAMEDSTREAMS
2817 				/*
2818 				 * If the shared cache is compressed, it may
2819 				 * have a namedstream vnode instantiated for
2820 				 * for it. That namedstream vnode will also
2821 				 * have to be marked with VSHARED_DYLD.
2822 				 */
2823 				if (vnode_hasnamedstreams(srfmp->vp)) {
2824 					vnode_t svp;
2825 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2826 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2827 						vnode_lock_spin(svp);
2828 						svp->v_flag |= VSHARED_DYLD;
2829 						vnode_unlock(svp);
2830 						vnode_put(svp);
2831 					}
2832 				}
2833 #endif /* NAMEDSTREAMS */
2834 				/*
2835 				 * release the vnode...
2836 				 * ubc_map() still holds it for us in the non-error case
2837 				 */
2838 				(void) vnode_put(srfmp->vp);
2839 				srfmp->vp = NULL;
2840 			}
2841 			if (srfmp->fp != NULL) {
2842 				/* release the file descriptor */
2843 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2844 				srfmp->fp = NULL;
2845 			}
2846 		}
2847 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2848 	}
2849 
2850 	if (shared_region != NULL) {
2851 		vm_shared_region_deallocate(shared_region);
2852 	}
2853 }
2854 
2855 
2856 /*
2857  * For each file mapped, we may have mappings for:
2858  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2859  * so let's round up to 8 mappings per file.
2860  */
2861 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2862 
2863 /*
2864  * This is the new interface for setting up shared region mappings.
2865  *
2866  * The slide used for shared regions setup using this interface is done differently
2867  * from the old interface. The slide value passed in the shared_files_np represents
2868  * a max value. The kernel will choose a random value based on that, then use it
2869  * for all shared regions.
2870  */
2871 #if defined (__x86_64__)
2872 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2873 #else
2874 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2875 #endif
2876 
2877 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2878 shared_region_map_and_slide_2_np(
2879 	struct proc                                  *p,
2880 	struct shared_region_map_and_slide_2_np_args *uap,
2881 	__unused int                                 *retvalp)
2882 {
2883 	unsigned int                  files_count;
2884 	struct shared_file_np         *shared_files = NULL;
2885 	unsigned int                  mappings_count;
2886 	struct shared_file_mapping_slide_np *mappings = NULL;
2887 	kern_return_t                 kr = KERN_SUCCESS;
2888 
2889 	files_count = uap->files_count;
2890 	mappings_count = uap->mappings_count;
2891 
2892 	if (files_count == 0) {
2893 		SHARED_REGION_TRACE_INFO(
2894 			("shared_region: %p [%d(%s)] map(): "
2895 			"no files\n",
2896 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2897 			proc_getpid(p), p->p_comm));
2898 		kr = 0; /* no files to map: we're done ! */
2899 		goto done;
2900 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2901 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2902 		if (shared_files == NULL) {
2903 			kr = KERN_RESOURCE_SHORTAGE;
2904 			goto done;
2905 		}
2906 	} else {
2907 		SHARED_REGION_TRACE_ERROR(
2908 			("shared_region: %p [%d(%s)] map(): "
2909 			"too many files (%d) max %d\n",
2910 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2911 			proc_getpid(p), p->p_comm,
2912 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2913 		kr = KERN_FAILURE;
2914 		goto done;
2915 	}
2916 
2917 	if (mappings_count == 0) {
2918 		SHARED_REGION_TRACE_INFO(
2919 			("shared_region: %p [%d(%s)] map(): "
2920 			"no mappings\n",
2921 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2922 			proc_getpid(p), p->p_comm));
2923 		kr = 0; /* no mappings: we're done ! */
2924 		goto done;
2925 	} else if (mappings_count <= SFM_MAX) {
2926 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2927 		if (mappings == NULL) {
2928 			kr = KERN_RESOURCE_SHORTAGE;
2929 			goto done;
2930 		}
2931 	} else {
2932 		SHARED_REGION_TRACE_ERROR(
2933 			("shared_region: %p [%d(%s)] map(): "
2934 			"too many mappings (%d) max %d\n",
2935 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2936 			proc_getpid(p), p->p_comm,
2937 			mappings_count, SFM_MAX));
2938 		kr = KERN_FAILURE;
2939 		goto done;
2940 	}
2941 
2942 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2943 	if (kr != KERN_SUCCESS) {
2944 		goto done;
2945 	}
2946 
2947 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2948 	if (kr != KERN_SUCCESS) {
2949 		goto done;
2950 	}
2951 
2952 	uint32_t max_slide = shared_files[0].sf_slide;
2953 	uint32_t random_val;
2954 	uint32_t slide_amount;
2955 
2956 	if (max_slide != 0) {
2957 		read_random(&random_val, sizeof random_val);
2958 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2959 	} else {
2960 		slide_amount = 0;
2961 	}
2962 #if DEVELOPMENT || DEBUG
2963 	extern bool bootarg_disable_aslr;
2964 	if (bootarg_disable_aslr) {
2965 		slide_amount = 0;
2966 	}
2967 #endif /* DEVELOPMENT || DEBUG */
2968 
2969 	/*
2970 	 * Fix up the mappings to reflect the desired slide.
2971 	 */
2972 	unsigned int f;
2973 	unsigned int m = 0;
2974 	unsigned int i;
2975 	for (f = 0; f < files_count; ++f) {
2976 		shared_files[f].sf_slide = slide_amount;
2977 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2978 			if (m >= mappings_count) {
2979 				SHARED_REGION_TRACE_ERROR(
2980 					("shared_region: %p [%d(%s)] map(): "
2981 					"mapping count argument was too small\n",
2982 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2983 					proc_getpid(p), p->p_comm));
2984 				kr = KERN_FAILURE;
2985 				goto done;
2986 			}
2987 			mappings[m].sms_address += slide_amount;
2988 			if (mappings[m].sms_slide_size != 0) {
2989 				mappings[m].sms_slide_start += slide_amount;
2990 			}
2991 		}
2992 	}
2993 
2994 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2995 done:
2996 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2997 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2998 	return kr;
2999 }
3000 
3001 /*
3002  * A syscall for dyld to use to map data pages that need load time relocation fixups.
3003  * The fixups are performed by a custom pager during page-in, so the pages still appear
3004  * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3005  * on demand later, all w/o using the compressor.
3006  *
3007  * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3008  * running, they are COW'd as normal.
3009  */
3010 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)3011 map_with_linking_np(
3012 	struct proc                     *p,
3013 	struct map_with_linking_np_args *uap,
3014 	__unused int                    *retvalp)
3015 {
3016 	uint32_t                        region_count;
3017 	uint32_t                        r;
3018 	struct mwl_region               *regions = NULL;
3019 	struct mwl_region               *rp;
3020 	uint32_t                        link_info_size;
3021 	void                            *link_info = NULL;      /* starts with a struct mwl_info_hdr */
3022 	struct mwl_info_hdr             *info_hdr = NULL;
3023 	uint64_t                        binds_size;
3024 	int                             fd;
3025 	struct fileproc                 *fp = NULL;
3026 	struct vnode                    *vp = NULL;
3027 	size_t                          file_size;
3028 	off_t                           fs;
3029 	struct vnode_attr               va;
3030 	memory_object_control_t         file_control = NULL;
3031 	int                             error;
3032 	kern_return_t                   kr = KERN_SUCCESS;
3033 
3034 	/*
3035 	 * Check if dyld has told us it finished with this call.
3036 	 */
3037 	if (p->p_disallow_map_with_linking) {
3038 		printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3039 		    __func__, proc_getpid(p), p->p_comm);
3040 		kr = KERN_FAILURE;
3041 		goto done;
3042 	}
3043 
3044 	/*
3045 	 * First we do some sanity checking on what dyld has passed us.
3046 	 */
3047 	region_count = uap->region_count;
3048 	link_info_size = uap->link_info_size;
3049 	if (region_count == 0) {
3050 		printf("%s: [%d(%s)]: region_count == 0\n",
3051 		    __func__, proc_getpid(p), p->p_comm);
3052 		kr = KERN_FAILURE;
3053 		goto done;
3054 	}
3055 	if (region_count > MWL_MAX_REGION_COUNT) {
3056 		printf("%s: [%d(%s)]: region_count too big %d\n",
3057 		    __func__, proc_getpid(p), p->p_comm, region_count);
3058 		kr = KERN_FAILURE;
3059 		goto done;
3060 	}
3061 
3062 	if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3063 		printf("%s: [%d(%s)]: link_info_size too small\n",
3064 		    __func__, proc_getpid(p), p->p_comm);
3065 		kr = KERN_FAILURE;
3066 		goto done;
3067 	}
3068 	if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3069 		printf("%s: [%d(%s)]: link_info_size too big %d\n",
3070 		    __func__, proc_getpid(p), p->p_comm, link_info_size);
3071 		kr = KERN_FAILURE;
3072 		goto done;
3073 	}
3074 
3075 	/*
3076 	 * Allocate and copyin the regions and link info
3077 	 */
3078 	regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3079 	if (regions == NULL) {
3080 		printf("%s: [%d(%s)]: failed to allocate regions\n",
3081 		    __func__, proc_getpid(p), p->p_comm);
3082 		kr = KERN_RESOURCE_SHORTAGE;
3083 		goto done;
3084 	}
3085 	kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3086 	if (kr != KERN_SUCCESS) {
3087 		printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3088 		    __func__, proc_getpid(p), p->p_comm, kr);
3089 		goto done;
3090 	}
3091 
3092 	link_info = kalloc_data(link_info_size, Z_WAITOK);
3093 	if (link_info == NULL) {
3094 		printf("%s: [%d(%s)]: failed to allocate link_info\n",
3095 		    __func__, proc_getpid(p), p->p_comm);
3096 		kr = KERN_RESOURCE_SHORTAGE;
3097 		goto done;
3098 	}
3099 	kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3100 	if (kr != KERN_SUCCESS) {
3101 		printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3102 		    __func__, proc_getpid(p), p->p_comm, kr);
3103 		goto done;
3104 	}
3105 
3106 	/*
3107 	 * Do some verification the data structures.
3108 	 */
3109 	info_hdr = (struct mwl_info_hdr *)link_info;
3110 	if (info_hdr->mwli_version != MWL_INFO_VERS) {
3111 		printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3112 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3113 		kr = KERN_FAILURE;
3114 		goto done;
3115 	}
3116 
3117 	if (info_hdr->mwli_binds_offset > link_info_size) {
3118 		printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3119 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3120 		kr = KERN_FAILURE;
3121 		goto done;
3122 	}
3123 
3124 	/* some older devs have s/w page size > h/w page size, no need to support them */
3125 	if (info_hdr->mwli_page_size != PAGE_SIZE) {
3126 		/* no printf, since this is expected on some devices */
3127 		kr = KERN_INVALID_ARGUMENT;
3128 		goto done;
3129 	}
3130 
3131 	binds_size = (uint64_t)info_hdr->mwli_binds_count *
3132 	    ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3133 	if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3134 		printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3135 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3136 		kr = KERN_FAILURE;
3137 		goto done;
3138 	}
3139 
3140 	if (info_hdr->mwli_chains_offset > link_info_size) {
3141 		printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3142 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3143 		kr = KERN_FAILURE;
3144 		goto done;
3145 	}
3146 
3147 
3148 	/*
3149 	 * Ensure the chained starts in the link info and make sure the
3150 	 * segment info offsets are within bounds.
3151 	 */
3152 	if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3153 		printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3154 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3155 		kr = KERN_FAILURE;
3156 		goto done;
3157 	}
3158 	if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3159 		printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3160 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3161 		kr = KERN_FAILURE;
3162 		goto done;
3163 	}
3164 
3165 	/* Note that more verification of offsets is done in the pager itself */
3166 
3167 	/*
3168 	 * Ensure we've only been given one FD and verify valid protections.
3169 	 */
3170 	fd = regions[0].mwlr_fd;
3171 	for (r = 0; r < region_count; ++r) {
3172 		if (regions[r].mwlr_fd != fd) {
3173 			printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3174 			    __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3175 			kr = KERN_FAILURE;
3176 			goto done;
3177 		}
3178 
3179 		/*
3180 		 * Only allow data mappings and not zero fill. Permit TPRO
3181 		 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
3182 		 */
3183 		if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3184 			printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3185 			    __func__, proc_getpid(p), p->p_comm);
3186 			kr = KERN_FAILURE;
3187 			goto done;
3188 		}
3189 		if (regions[r].mwlr_protections & VM_PROT_ZF) {
3190 			printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
3191 			    __func__, proc_getpid(p), p->p_comm, r);
3192 			kr = KERN_FAILURE;
3193 			goto done;
3194 		}
3195 		if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
3196 		    !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
3197 			printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
3198 			    __func__, proc_getpid(p), p->p_comm, r);
3199 			kr = KERN_FAILURE;
3200 			goto done;
3201 		}
3202 	}
3203 
3204 
3205 	/* get file structure from file descriptor */
3206 	error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3207 	if (error) {
3208 		printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3209 		    __func__, proc_getpid(p), p->p_comm, error);
3210 		kr = KERN_FAILURE;
3211 		goto done;
3212 	}
3213 
3214 	/* We need at least read permission on the file */
3215 	if (!(fp->fp_glob->fg_flag & FREAD)) {
3216 		printf("%s: [%d(%s)]: not readable\n",
3217 		    __func__, proc_getpid(p), p->p_comm);
3218 		kr = KERN_FAILURE;
3219 		goto done;
3220 	}
3221 
3222 	/* Get the vnode from file structure */
3223 	vp = (struct vnode *)fp_get_data(fp);
3224 	error = vnode_getwithref(vp);
3225 	if (error) {
3226 		printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3227 		    __func__, proc_getpid(p), p->p_comm, error);
3228 		kr = KERN_FAILURE;
3229 		vp = NULL; /* just to be sure */
3230 		goto done;
3231 	}
3232 
3233 	/* Make sure the vnode is a regular file */
3234 	if (vp->v_type != VREG) {
3235 		printf("%s: [%d(%s)]: vnode not VREG\n",
3236 		    __func__, proc_getpid(p), p->p_comm);
3237 		kr = KERN_FAILURE;
3238 		goto done;
3239 	}
3240 
3241 	/* get vnode size */
3242 	error = vnode_size(vp, &fs, vfs_context_current());
3243 	if (error) {
3244 		goto done;
3245 	}
3246 	file_size = fs;
3247 
3248 	/* get the file's memory object handle */
3249 	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3250 	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3251 		printf("%s: [%d(%s)]: no memory object\n",
3252 		    __func__, proc_getpid(p), p->p_comm);
3253 		kr = KERN_FAILURE;
3254 		goto done;
3255 	}
3256 
3257 	for (r = 0; r < region_count; ++r) {
3258 		rp = &regions[r];
3259 
3260 #if CONFIG_MACF
3261 		vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3262 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3263 		    fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3264 		if (error) {
3265 			printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3266 			    __func__, proc_getpid(p), p->p_comm, r, error);
3267 			kr = KERN_FAILURE;
3268 			goto done;
3269 		}
3270 #endif /* MAC */
3271 
3272 		/* check that the mappings are properly covered by code signatures */
3273 		if (cs_system_enforcement()) {
3274 			if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3275 				printf("%s: [%d(%s)]: region %d, not code signed\n",
3276 				    __func__, proc_getpid(p), p->p_comm, r);
3277 				kr = KERN_FAILURE;
3278 				goto done;
3279 			}
3280 		}
3281 	}
3282 
3283 	/* update the vnode's access time */
3284 	if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3285 		VATTR_INIT(&va);
3286 		nanotime(&va.va_access_time);
3287 		VATTR_SET_ACTIVE(&va, va_access_time);
3288 		vnode_setattr(vp, &va, vfs_context_current());
3289 	}
3290 
3291 	/* get the VM to do the work */
3292 	kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3293 
3294 done:
3295 	if (fp != NULL) {
3296 		/* release the file descriptor */
3297 		fp_drop(p, fd, fp, 0);
3298 	}
3299 	if (vp != NULL) {
3300 		(void)vnode_put(vp);
3301 	}
3302 	if (regions != NULL) {
3303 		kfree_data(regions, region_count * sizeof(regions[0]));
3304 	}
3305 	/* link info is used in the pager if things worked */
3306 	if (link_info != NULL && kr != KERN_SUCCESS) {
3307 		kfree_data(link_info, link_info_size);
3308 	}
3309 
3310 	switch (kr) {
3311 	case KERN_SUCCESS:
3312 		return 0;
3313 	case KERN_RESOURCE_SHORTAGE:
3314 		return ENOMEM;
3315 	default:
3316 		return EINVAL;
3317 	}
3318 }
3319 
3320 #if DEBUG || DEVELOPMENT
3321 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3322     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3323 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3324     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3325 #endif /* DEBUG || DEVELOPMENT */
3326 
3327 /* sysctl overflow room */
3328 
3329 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3330     (int *) &page_size, 0, "vm page size");
3331 
3332 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3333  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3334  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3335 extern unsigned int     vm_page_free_target;
3336 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3337     &vm_page_free_target, 0, "Pageout daemon free target");
3338 
3339 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3340     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3341 
3342 static int
3343 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3344 {
3345 #pragma unused(oidp, arg1, arg2)
3346 	unsigned int page_free_wanted;
3347 
3348 	page_free_wanted = mach_vm_ctl_page_free_wanted();
3349 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3350 }
3351 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3352     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3353     0, 0, vm_ctl_page_free_wanted, "I", "");
3354 
3355 extern unsigned int     vm_page_purgeable_count;
3356 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3357     &vm_page_purgeable_count, 0, "Purgeable page count");
3358 
3359 extern unsigned int     vm_page_purgeable_wired_count;
3360 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3361     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3362 
3363 extern unsigned int vm_page_kern_lpage_count;
3364 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3365     &vm_page_kern_lpage_count, 0, "kernel used large pages");
3366 
3367 #if DEVELOPMENT || DEBUG
3368 #if __ARM_MIXED_PAGE_SIZE__
3369 static int vm_mixed_pagesize_supported = 1;
3370 #else
3371 static int vm_mixed_pagesize_supported = 0;
3372 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3373 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3374     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3375 
3376 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3377 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3378 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3379     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3380 
3381 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3382     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3383 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3384     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3385 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3386     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3387 
3388 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3389     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3390 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3391     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3392 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3393     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3394 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3395     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3396 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3397     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3398 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3399     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3400 #endif /* DEVELOPMENT || DEBUG */
3401 
3402 extern int madvise_free_debug;
3403 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3404     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3405 extern int madvise_free_debug_sometimes;
3406 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3407     &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3408 
3409 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3410     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3411 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3412     &vm_page_stats_reusable.reusable_pages_success, "");
3413 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3414     &vm_page_stats_reusable.reusable_pages_failure, "");
3415 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3416     &vm_page_stats_reusable.reusable_pages_shared, "");
3417 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3418     &vm_page_stats_reusable.all_reusable_calls, "");
3419 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3420     &vm_page_stats_reusable.partial_reusable_calls, "");
3421 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3422     &vm_page_stats_reusable.reuse_pages_success, "");
3423 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3424     &vm_page_stats_reusable.reuse_pages_failure, "");
3425 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3426     &vm_page_stats_reusable.all_reuse_calls, "");
3427 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3428     &vm_page_stats_reusable.partial_reuse_calls, "");
3429 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3430     &vm_page_stats_reusable.can_reuse_success, "");
3431 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3432     &vm_page_stats_reusable.can_reuse_failure, "");
3433 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3434     &vm_page_stats_reusable.reusable_reclaimed, "");
3435 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3436     &vm_page_stats_reusable.reusable_nonwritable, "");
3437 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3438     &vm_page_stats_reusable.reusable_shared, "");
3439 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3440     &vm_page_stats_reusable.free_shared, "");
3441 
3442 
3443 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3444 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3445 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3446 
3447 extern unsigned int vm_page_cleaned_count;
3448 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3449 
3450 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3451 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3452 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3453 
3454 /* pageout counts */
3455 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3456 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3457 
3458 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3459 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3460 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3461 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3462 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3463 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3464 
3465 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3466 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3467 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3468 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3469 extern unsigned int vm_page_realtime_count;
3470 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3471 extern int vm_pageout_protect_realtime;
3472 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3473 
3474 /* counts of pages prefaulted when entering a memory object */
3475 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3476 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3477 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3478 
3479 #if defined (__x86_64__)
3480 extern unsigned int vm_clump_promote_threshold;
3481 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3482 #if DEVELOPMENT || DEBUG
3483 extern unsigned long vm_clump_stats[];
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3488 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3489 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3490 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3491 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3492 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3493 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3494 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3500 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3503 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3504 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3505 #endif  /* if DEVELOPMENT || DEBUG */
3506 #endif  /* #if defined (__x86_64__) */
3507 
3508 #if CONFIG_SECLUDED_MEMORY
3509 
3510 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3511 extern unsigned int vm_page_secluded_target;
3512 extern unsigned int vm_page_secluded_count;
3513 extern unsigned int vm_page_secluded_count_free;
3514 extern unsigned int vm_page_secluded_count_inuse;
3515 extern unsigned int vm_page_secluded_count_over_target;
3516 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3517 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3518 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3519 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3520 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3521 
3522 extern struct vm_page_secluded_data vm_page_secluded;
3523 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3524 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3525 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3526 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3530 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3531 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3532 
3533 #endif /* CONFIG_SECLUDED_MEMORY */
3534 
3535 #pragma mark Deferred Reclaim
3536 
3537 #if CONFIG_DEFERRED_RECLAIM
3538 
3539 #if DEVELOPMENT || DEBUG
3540 /*
3541  * VM reclaim testing
3542  */
3543 extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
3544 
3545 static int
3546 sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
3547 {
3548 #pragma unused(arg1, arg2)
3549 	int error = EINVAL, pid = 0;
3550 	/*
3551 	 * Only send on write
3552 	 */
3553 	error = sysctl_handle_int(oidp, &pid, 0, req);
3554 	if (error || !req->newptr) {
3555 		return error;
3556 	}
3557 
3558 	bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
3559 	if (success) {
3560 		error = 0;
3561 	}
3562 
3563 	return error;
3564 }
3565 
3566 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
3567     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
3568     &sysctl_vm_reclaim_drain_async_queue, "I", "");
3569 
3570 
3571 extern uint64_t vm_reclaim_max_threshold;
3572 extern uint64_t vm_reclaim_trim_divisor;
3573 
3574 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
3575 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
3576 #endif /* DEVELOPMENT || DEBUG */
3577 
3578 #endif /* CONFIG_DEFERRED_RECLAIM */
3579 
3580 #include <kern/thread.h>
3581 #include <sys/user.h>
3582 
3583 void vm_pageout_io_throttle(void);
3584 
3585 void
vm_pageout_io_throttle(void)3586 vm_pageout_io_throttle(void)
3587 {
3588 	struct uthread *uthread = current_uthread();
3589 
3590 	/*
3591 	 * thread is marked as a low priority I/O type
3592 	 * and the I/O we issued while in this cleaning operation
3593 	 * collided with normal I/O operations... we'll
3594 	 * delay in order to mitigate the impact of this
3595 	 * task on the normal operation of the system
3596 	 */
3597 
3598 	if (uthread->uu_lowpri_window) {
3599 		throttle_lowpri_io(1);
3600 	}
3601 }
3602 
3603 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3604 vm_pressure_monitor(
3605 	__unused struct proc *p,
3606 	struct vm_pressure_monitor_args *uap,
3607 	int *retval)
3608 {
3609 	kern_return_t   kr;
3610 	uint32_t        pages_reclaimed;
3611 	uint32_t        pages_wanted;
3612 
3613 	kr = mach_vm_pressure_monitor(
3614 		(boolean_t) uap->wait_for_pressure,
3615 		uap->nsecs_monitored,
3616 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3617 		&pages_wanted);
3618 
3619 	switch (kr) {
3620 	case KERN_SUCCESS:
3621 		break;
3622 	case KERN_ABORTED:
3623 		return EINTR;
3624 	default:
3625 		return EINVAL;
3626 	}
3627 
3628 	if (uap->pages_reclaimed) {
3629 		if (copyout((void *)&pages_reclaimed,
3630 		    uap->pages_reclaimed,
3631 		    sizeof(pages_reclaimed)) != 0) {
3632 			return EFAULT;
3633 		}
3634 	}
3635 
3636 	*retval = (int) pages_wanted;
3637 	return 0;
3638 }
3639 
3640 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3641 kas_info(struct proc *p,
3642     struct kas_info_args *uap,
3643     int *retval __unused)
3644 {
3645 #ifndef CONFIG_KAS_INFO
3646 	(void)p;
3647 	(void)uap;
3648 	return ENOTSUP;
3649 #else /* CONFIG_KAS_INFO */
3650 	int                     selector = uap->selector;
3651 	user_addr_t     valuep = uap->value;
3652 	user_addr_t     sizep = uap->size;
3653 	user_size_t size, rsize;
3654 	int                     error;
3655 
3656 	if (!kauth_cred_issuser(kauth_cred_get())) {
3657 		return EPERM;
3658 	}
3659 
3660 #if CONFIG_MACF
3661 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3662 	if (error) {
3663 		return error;
3664 	}
3665 #endif
3666 
3667 	if (IS_64BIT_PROCESS(p)) {
3668 		user64_size_t size64;
3669 		error = copyin(sizep, &size64, sizeof(size64));
3670 		size = (user_size_t)size64;
3671 	} else {
3672 		user32_size_t size32;
3673 		error = copyin(sizep, &size32, sizeof(size32));
3674 		size = (user_size_t)size32;
3675 	}
3676 	if (error) {
3677 		return error;
3678 	}
3679 
3680 	switch (selector) {
3681 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3682 	{
3683 		uint64_t slide = vm_kernel_slide;
3684 
3685 		if (sizeof(slide) != size) {
3686 			return EINVAL;
3687 		}
3688 
3689 		error = copyout(&slide, valuep, sizeof(slide));
3690 		if (error) {
3691 			return error;
3692 		}
3693 		rsize = size;
3694 	}
3695 	break;
3696 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3697 	{
3698 		uint32_t i;
3699 		kernel_mach_header_t *mh = &_mh_execute_header;
3700 		struct load_command *cmd;
3701 		cmd = (struct load_command*) &mh[1];
3702 		uint64_t *bases;
3703 		rsize = mh->ncmds * sizeof(uint64_t);
3704 
3705 		/*
3706 		 * Return the size if no data was passed
3707 		 */
3708 		if (valuep == 0) {
3709 			break;
3710 		}
3711 
3712 		if (rsize > size) {
3713 			return EINVAL;
3714 		}
3715 
3716 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3717 
3718 		for (i = 0; i < mh->ncmds; i++) {
3719 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3720 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3721 				bases[i] = (uint64_t)sg->vmaddr;
3722 			}
3723 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3724 		}
3725 
3726 		error = copyout(bases, valuep, rsize);
3727 
3728 		kfree_data(bases, rsize);
3729 
3730 		if (error) {
3731 			return error;
3732 		}
3733 	}
3734 	break;
3735 	default:
3736 		return EINVAL;
3737 	}
3738 
3739 	if (IS_64BIT_PROCESS(p)) {
3740 		user64_size_t size64 = (user64_size_t)rsize;
3741 		error = copyout(&size64, sizep, sizeof(size64));
3742 	} else {
3743 		user32_size_t size32 = (user32_size_t)rsize;
3744 		error = copyout(&size32, sizep, sizeof(size32));
3745 	}
3746 
3747 	return error;
3748 #endif /* CONFIG_KAS_INFO */
3749 }
3750 
3751 #if __has_feature(ptrauth_calls)
3752 /*
3753  * Generate a random pointer signing key that isn't 0.
3754  */
3755 uint64_t
generate_jop_key(void)3756 generate_jop_key(void)
3757 {
3758 	uint64_t key;
3759 
3760 	do {
3761 		read_random(&key, sizeof key);
3762 	} while (key == 0);
3763 	return key;
3764 }
3765 #endif /* __has_feature(ptrauth_calls) */
3766 
3767 
3768 #pragma clang diagnostic push
3769 #pragma clang diagnostic ignored "-Wcast-qual"
3770 #pragma clang diagnostic ignored "-Wunused-function"
3771 
3772 static void
asserts()3773 asserts()
3774 {
3775 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3776 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3777 }
3778 
3779 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3780 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3781 #pragma clang diagnostic pop
3782 
3783 extern uint32_t vm_page_pages;
3784 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3785 
3786 extern uint32_t vm_page_busy_absent_skipped;
3787 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3788 
3789 extern uint32_t vm_page_upl_tainted;
3790 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3791 
3792 extern uint32_t vm_page_iopl_tainted;
3793 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3794 
3795 #if __arm64__ && (DEVELOPMENT || DEBUG)
3796 extern int vm_footprint_suspend_allowed;
3797 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3798 
3799 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3800 static int
3801 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3802 {
3803 #pragma unused(oidp, arg1, arg2)
3804 	int error = 0;
3805 	int new_value;
3806 
3807 	if (req->newptr == USER_ADDR_NULL) {
3808 		return 0;
3809 	}
3810 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3811 	if (error) {
3812 		return error;
3813 	}
3814 	if (!vm_footprint_suspend_allowed) {
3815 		if (new_value != 0) {
3816 			/* suspends are not allowed... */
3817 			return 0;
3818 		}
3819 		/* ... but let resumes proceed */
3820 	}
3821 	DTRACE_VM2(footprint_suspend,
3822 	    vm_map_t, current_map(),
3823 	    int, new_value);
3824 
3825 	pmap_footprint_suspend(current_map(), new_value);
3826 
3827 	return 0;
3828 }
3829 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3830     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3831     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3832 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3833 
3834 extern uint64_t vm_map_corpse_footprint_count;
3835 extern uint64_t vm_map_corpse_footprint_size_avg;
3836 extern uint64_t vm_map_corpse_footprint_size_max;
3837 extern uint64_t vm_map_corpse_footprint_full;
3838 extern uint64_t vm_map_corpse_footprint_no_buf;
3839 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3840     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3841 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3842     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3843 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3844     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3845 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3846     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3847 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3848     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3849 
3850 #if CODE_SIGNING_MONITOR
3851 extern uint64_t vm_cs_defer_to_csm;
3852 extern uint64_t vm_cs_defer_to_csm_not;
3853 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3854     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3855 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3856     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3857 #endif /* CODE_SIGNING_MONITOR */
3858 
3859 extern uint64_t shared_region_pager_copied;
3860 extern uint64_t shared_region_pager_slid;
3861 extern uint64_t shared_region_pager_slid_error;
3862 extern uint64_t shared_region_pager_reclaimed;
3863 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3864     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3865 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3866     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3867 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3868     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3869 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3870     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3871 extern int shared_region_destroy_delay;
3872 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3873     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3874 
3875 #if MACH_ASSERT
3876 extern int pmap_ledgers_panic_leeway;
3877 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3878 #endif /* MACH_ASSERT */
3879 
3880 
3881 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3882 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3883 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3884 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3885 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3886 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3887 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3888 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3889 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3890 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3891 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3892 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3893 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3894 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3895     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3896 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3897     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3898 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3899     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3900 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3901     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3902 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3903     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3904 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3905     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3906 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3907     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3908 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3909     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3910 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3911     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3912 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3913     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3914 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3915     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3916 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3917     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3918 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3919     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3920 
3921 extern int vm_protect_privileged_from_untrusted;
3922 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3923     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3924 extern uint64_t vm_copied_on_read;
3925 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3926     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3927 
3928 extern int vm_shared_region_count;
3929 extern int vm_shared_region_peak;
3930 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3931     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3932 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3933     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3934 #if DEVELOPMENT || DEBUG
3935 extern unsigned int shared_region_pagers_resident_count;
3936 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3937     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3938 extern unsigned int shared_region_pagers_resident_peak;
3939 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3940     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3941 extern int shared_region_pager_count;
3942 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3943     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3944 #if __has_feature(ptrauth_calls)
3945 extern int shared_region_key_count;
3946 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3947     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3948 extern int vm_shared_region_reslide_count;
3949 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3950     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3951 #endif /* __has_feature(ptrauth_calls) */
3952 #endif /* DEVELOPMENT || DEBUG */
3953 
3954 #if MACH_ASSERT
3955 extern int debug4k_filter;
3956 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3957 extern int debug4k_panic_on_terminate;
3958 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3959 extern int debug4k_panic_on_exception;
3960 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3961 extern int debug4k_panic_on_misaligned_sharing;
3962 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3963 #endif /* MACH_ASSERT */
3964 
3965 extern uint64_t vm_map_set_size_limit_count;
3966 extern uint64_t vm_map_set_data_limit_count;
3967 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3968 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3969 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3970 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3971 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3972 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3973 
3974 extern uint64_t vm_fault_resilient_media_initiate;
3975 extern uint64_t vm_fault_resilient_media_retry;
3976 extern uint64_t vm_fault_resilient_media_proceed;
3977 extern uint64_t vm_fault_resilient_media_release;
3978 extern uint64_t vm_fault_resilient_media_abort1;
3979 extern uint64_t vm_fault_resilient_media_abort2;
3980 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3981 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3982 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3983 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3984 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3985 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3986 #if MACH_ASSERT
3987 extern int vm_fault_resilient_media_inject_error1_rate;
3988 extern int vm_fault_resilient_media_inject_error1;
3989 extern int vm_fault_resilient_media_inject_error2_rate;
3990 extern int vm_fault_resilient_media_inject_error2;
3991 extern int vm_fault_resilient_media_inject_error3_rate;
3992 extern int vm_fault_resilient_media_inject_error3;
3993 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3994 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3995 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3996 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3997 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3998 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3999 #endif /* MACH_ASSERT */
4000 
4001 extern uint64_t pmap_query_page_info_retries;
4002 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
4003 
4004 /*
4005  * A sysctl which causes all existing shared regions to become stale. They
4006  * will no longer be used by anything new and will be torn down as soon as
4007  * the last existing user exits. A write of non-zero value causes that to happen.
4008  * This should only be used by launchd, so we check that this is initproc.
4009  */
4010 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4011 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4012 {
4013 	unsigned int value = 0;
4014 	int changed = 0;
4015 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
4016 	if (error || !changed) {
4017 		return error;
4018 	}
4019 	if (current_proc() != initproc) {
4020 		return EPERM;
4021 	}
4022 
4023 	vm_shared_region_pivot();
4024 
4025 	return 0;
4026 }
4027 
4028 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
4029     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
4030     0, 0, shared_region_pivot, "I", "");
4031 
4032 extern uint64_t vm_object_shadow_forced;
4033 extern uint64_t vm_object_shadow_skipped;
4034 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
4035     &vm_object_shadow_forced, "");
4036 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
4037     &vm_object_shadow_skipped, "");
4038 
4039 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
4040     &vmtc_total, 0, "total text page corruptions detected");
4041 
4042 
4043 #if DEBUG || DEVELOPMENT
4044 /*
4045  * A sysctl that can be used to corrupt a text page with an illegal instruction.
4046  * Used for testing text page self healing.
4047  */
4048 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4049 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4050 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4051 {
4052 	uint64_t value = 0;
4053 	int error = sysctl_handle_quad(oidp, &value, 0, req);
4054 	if (error || !req->newptr) {
4055 		return error;
4056 	}
4057 
4058 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4059 		return 0;
4060 	} else {
4061 		return EINVAL;
4062 	}
4063 }
4064 
4065 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4066     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4067     0, 0, corrupt_text_addr, "-", "");
4068 #endif /* DEBUG || DEVELOPMENT */
4069 
4070 #if CONFIG_MAP_RANGES
4071 /*
4072  * vm.malloc_ranges
4073  *
4074  * space-separated list of <left:right> hexadecimal addresses.
4075  */
4076 static int
4077 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
4078 {
4079 	vm_map_t map = current_map();
4080 	struct mach_vm_range r1, r2;
4081 	char str[20 * 4];
4082 	int len;
4083 
4084 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
4085 		return ENOENT;
4086 	}
4087 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
4088 		return ENOENT;
4089 	}
4090 
4091 	len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
4092 	    r1.max_address, r2.min_address,
4093 	    r2.max_address, get_map_max(map));
4094 
4095 	return SYSCTL_OUT(req, str, len);
4096 }
4097 
4098 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
4099     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4100     0, 0, &vm_map_malloc_ranges, "A", "");
4101 
4102 #if DEBUG || DEVELOPMENT
4103 static int
4104 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4105 {
4106 #pragma unused(arg1, arg2, oidp)
4107 	struct mach_vm_range range;
4108 
4109 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4110 	    != KERN_SUCCESS) {
4111 		return EINVAL;
4112 	}
4113 
4114 	return SYSCTL_OUT(req, &range, sizeof(range));
4115 }
4116 
4117 static int
4118 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4119 {
4120 #pragma unused(arg1, arg2, oidp)
4121 	struct mach_vm_range range;
4122 
4123 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4124 	    != KERN_SUCCESS) {
4125 		return EINVAL;
4126 	}
4127 
4128 	return SYSCTL_OUT(req, &range, sizeof(range));
4129 }
4130 
4131 /*
4132  * A sysctl that can be used to return ranges for the current VM map.
4133  * Used for testing VM ranges.
4134  */
4135 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4136     0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4137 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4138     0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4139 
4140 #endif /* DEBUG || DEVELOPMENT */
4141 #endif /* CONFIG_MAP_RANGES */
4142 
4143 #if DEBUG || DEVELOPMENT
4144 #endif /* DEBUG || DEVELOPMENT */
4145 
4146 extern uint64_t vm_map_range_overflows_count;
4147 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4148 extern boolean_t vm_map_range_overflows_log;
4149 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4150 
4151 extern uint64_t c_seg_filled_no_contention;
4152 extern uint64_t c_seg_filled_contention;
4153 extern clock_sec_t c_seg_filled_contention_sec_max;
4154 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4155 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4156 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4157 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4158 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4159 #if (XNU_TARGET_OS_OSX && __arm64__)
4160 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4161 extern int c_process_major_yield_after; /* yield after moving ? segments */
4162 extern uint64_t c_process_major_reports;
4163 extern clock_sec_t c_process_major_max_sec;
4164 extern clock_nsec_t c_process_major_max_nsec;
4165 extern uint32_t c_process_major_peak_segcount;
4166 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4167 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4168 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4169 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4170 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4171 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4172 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4173 
4174 #if DEVELOPMENT || DEBUG
4175 extern int panic_object_not_alive;
4176 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4177 #endif /* DEVELOPMENT || DEBUG */
4178 
4179 #if MACH_ASSERT
4180 extern int fbdp_no_panic;
4181 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4182 #endif /* MACH_ASSERT */
4183 
4184