xref: /xnu-10002.1.13/bsd/vm/vm_unix.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57 
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #include <sys/code_signing.h>
86 #if NECP
87 #include <net/necp.h>
88 #endif /* NECP */
89 #if SKYWALK
90 #include <skywalk/os_channel.h>
91 #endif /* SKYWALK */
92 
93 #include <security/audit/audit.h>
94 #include <security/mac.h>
95 #include <bsm/audit_kevents.h>
96 
97 #include <kern/kalloc.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_kern.h>
100 #include <vm/vm_pageout.h>
101 
102 #include <mach/shared_region.h>
103 #include <vm/vm_shared_region.h>
104 
105 #include <vm/vm_dyld_pager.h>
106 
107 #include <vm/vm_protos.h>
108 
109 #include <sys/kern_memorystatus.h>
110 #include <sys/kern_memorystatus_freeze.h>
111 #include <sys/proc_internal.h>
112 
113 #include <mach-o/fixup-chains.h>
114 
115 #if CONFIG_MACF
116 #include <security/mac_framework.h>
117 #endif
118 
119 #include <kern/bits.h>
120 
121 #if CONFIG_CSR
122 #include <sys/csr.h>
123 #endif /* CONFIG_CSR */
124 #include <sys/trust_caches.h>
125 #include <libkern/amfi/amfi.h>
126 #include <IOKit/IOBSD.h>
127 
128 #if VM_MAP_DEBUG_APPLE_PROTECT
129 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
130 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
131 
132 #if VM_MAP_DEBUG_FOURK
133 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
134 #endif /* VM_MAP_DEBUG_FOURK */
135 
136 #if DEVELOPMENT || DEBUG
137 
138 static int
139 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
140 {
141 #pragma unused(arg1, arg2)
142 	vm_offset_t     kaddr;
143 	kern_return_t   kr;
144 	int     error = 0;
145 	int     size = 0;
146 
147 	error = sysctl_handle_int(oidp, &size, 0, req);
148 	if (error || !req->newptr) {
149 		return error;
150 	}
151 
152 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
153 	    0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
154 
155 	if (kr == KERN_SUCCESS) {
156 		kmem_free(kernel_map, kaddr, size);
157 	}
158 
159 	return error;
160 }
161 
162 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
163     0, 0, &sysctl_kmem_alloc_contig, "I", "");
164 
165 extern int vm_region_footprint;
166 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
167 
168 static int
169 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
170 {
171 #pragma unused(arg1, arg2, oidp)
172 	kmem_gobj_stats stats = kmem_get_gobj_stats();
173 
174 	return SYSCTL_OUT(req, &stats, sizeof(stats));
175 }
176 
177 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
178     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
179     0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
180 
181 #endif /* DEVELOPMENT || DEBUG */
182 
183 static int
184 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
185 {
186 #pragma unused(arg1, arg2, oidp)
187 	int     error = 0;
188 	int     value;
189 
190 	value = task_self_region_footprint();
191 	error = SYSCTL_OUT(req, &value, sizeof(int));
192 	if (error) {
193 		return error;
194 	}
195 
196 	if (!req->newptr) {
197 		return 0;
198 	}
199 
200 	error = SYSCTL_IN(req, &value, sizeof(int));
201 	if (error) {
202 		return error;
203 	}
204 	task_self_region_footprint_set(value);
205 	return 0;
206 }
207 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
208 
209 static int
210 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
211 {
212 #pragma unused(arg1, arg2, oidp)
213 	int     error = 0;
214 	int     value;
215 
216 	value = (1 << thread_self_region_page_shift());
217 	error = SYSCTL_OUT(req, &value, sizeof(int));
218 	if (error) {
219 		return error;
220 	}
221 
222 	if (!req->newptr) {
223 		return 0;
224 	}
225 
226 	error = SYSCTL_IN(req, &value, sizeof(int));
227 	if (error) {
228 		return error;
229 	}
230 
231 	if (value != 0 && value != 4096 && value != 16384) {
232 		return EINVAL;
233 	}
234 
235 #if !__ARM_MIXED_PAGE_SIZE__
236 	if (value != vm_map_page_size(current_map())) {
237 		return EINVAL;
238 	}
239 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
240 
241 	thread_self_region_page_shift_set(bit_first(value));
242 	return 0;
243 }
244 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
245 
246 
247 #if DEVELOPMENT || DEBUG
248 extern int panic_on_unsigned_execute;
249 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
250 
251 extern int vm_log_xnu_user_debug;
252 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
253 #endif /* DEVELOPMENT || DEBUG */
254 
255 extern int cs_executable_create_upl;
256 extern int cs_executable_wire;
257 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
259 
260 extern int apple_protect_pager_count;
261 extern int apple_protect_pager_count_mapped;
262 extern unsigned int apple_protect_pager_cache_limit;
263 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
265 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
266 
267 #if DEVELOPMENT || DEBUG
268 extern int radar_20146450;
269 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
270 
271 extern int macho_printf;
272 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
273 
274 extern int apple_protect_pager_data_request_debug;
275 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
276 
277 #if __arm64__
278 /* These are meant to support the page table accounting unit test. */
279 extern unsigned int arm_hardware_page_size;
280 extern unsigned int arm_pt_desc_size;
281 extern unsigned int arm_pt_root_size;
282 extern unsigned int inuse_user_tteroot_count;
283 extern unsigned int inuse_kernel_tteroot_count;
284 extern unsigned int inuse_user_ttepages_count;
285 extern unsigned int inuse_kernel_ttepages_count;
286 extern unsigned int inuse_user_ptepages_count;
287 extern unsigned int inuse_kernel_ptepages_count;
288 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
289 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
297 extern unsigned int free_page_size_tt_count;
298 extern unsigned int free_two_page_size_tt_count;
299 extern unsigned int free_tt_count;
300 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
301 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
303 #if DEVELOPMENT || DEBUG
304 extern unsigned long pmap_asid_flushes;
305 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
306 extern unsigned long pmap_asid_hits;
307 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
308 extern unsigned long pmap_asid_misses;
309 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
310 #endif
311 #endif /* __arm64__ */
312 
313 #if __arm64__
314 extern int fourk_pager_data_request_debug;
315 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
316 #endif /* __arm64__ */
317 #endif /* DEVELOPMENT || DEBUG */
318 
319 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
328 #if VM_SCAN_FOR_SHADOW_CHAIN
329 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
330 extern int proc_shadow_max(void);
331 static int
332 vm_shadow_max SYSCTL_HANDLER_ARGS
333 {
334 #pragma unused(arg1, arg2, oidp)
335 	int value = 0;
336 
337 	if (vm_shadow_max_enabled) {
338 		value = proc_shadow_max();
339 	}
340 
341 	return SYSCTL_OUT(req, &value, sizeof(value));
342 }
343 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
344     0, 0, &vm_shadow_max, "I", "");
345 
346 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
347 
348 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
349 
350 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
351 
352 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
353 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
354 /*
355  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
356  */
357 
358 #if DEVELOPMENT || DEBUG
359 extern int allow_stack_exec, allow_data_exec;
360 
361 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
362 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
363 
364 #endif /* DEVELOPMENT || DEBUG */
365 
366 static const char *prot_values[] = {
367 	"none",
368 	"read-only",
369 	"write-only",
370 	"read-write",
371 	"execute-only",
372 	"read-execute",
373 	"write-execute",
374 	"read-write-execute"
375 };
376 
377 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)378 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
379 {
380 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
381 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
382 }
383 
384 /*
385  * shared_region_unnest_logging: level of logging of unnesting events
386  * 0	- no logging
387  * 1	- throttled logging of unexpected unnesting events (default)
388  * 2	- unthrottled logging of unexpected unnesting events
389  * 3+	- unthrottled logging of all unnesting events
390  */
391 int shared_region_unnest_logging = 1;
392 
393 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
394     &shared_region_unnest_logging, 0, "");
395 
396 int vm_shared_region_unnest_log_interval = 10;
397 int shared_region_unnest_log_count_threshold = 5;
398 
399 
400 #if XNU_TARGET_OS_OSX
401 
402 #if defined (__x86_64__)
403 static int scdir_enforce = 1;
404 #else /* defined (__x86_64__) */
405 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
406 #endif /* defined (__x86_64__) */
407 
408 static char *scdir_path[] = {
409 	"/System/Library/dyld/",
410 	"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
411 	"/System/Cryptexes/OS/System/Library/dyld",
412 	NULL
413 };
414 
415 #else /* XNU_TARGET_OS_OSX */
416 
417 static int scdir_enforce = 0;
418 static char *scdir_path[] = {
419 	"/System/Library/Caches/com.apple.dyld/",
420 	"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
421 	"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 	NULL
423 };
424 
425 #endif /* XNU_TARGET_OS_OSX */
426 
427 static char *driverkit_scdir_path[] = {
428 	"/System/DriverKit/System/Library/dyld/",
429 #if XNU_TARGET_OS_OSX
430 	"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
431 #else
432 	"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
433 #endif /* XNU_TARGET_OS_OSX */
434 	"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
435 	NULL
436 };
437 
438 #ifndef SECURE_KERNEL
439 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
440 {
441 #if CONFIG_CSR
442 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
443 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
444 		return EPERM;
445 	}
446 #endif /* CONFIG_CSR */
447 	return sysctl_handle_int(oidp, arg1, arg2, req);
448 }
449 
450 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
451 #endif
452 
453 /* These log rate throttling state variables aren't thread safe, but
454  * are sufficient unto the task.
455  */
456 static int64_t last_unnest_log_time = 0;
457 static int shared_region_unnest_log_count = 0;
458 
459 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)460 log_unnest_badness(
461 	vm_map_t        m,
462 	vm_map_offset_t s,
463 	vm_map_offset_t e,
464 	boolean_t       is_nested_map,
465 	vm_map_offset_t lowest_unnestable_addr)
466 {
467 	struct timeval  tv;
468 
469 	if (shared_region_unnest_logging == 0) {
470 		return;
471 	}
472 
473 	if (shared_region_unnest_logging <= 2 &&
474 	    is_nested_map &&
475 	    s >= lowest_unnestable_addr) {
476 		/*
477 		 * Unnesting of writable map entries is fine.
478 		 */
479 		return;
480 	}
481 
482 	if (shared_region_unnest_logging <= 1) {
483 		microtime(&tv);
484 		if ((tv.tv_sec - last_unnest_log_time) <
485 		    vm_shared_region_unnest_log_interval) {
486 			if (shared_region_unnest_log_count++ >
487 			    shared_region_unnest_log_count_threshold) {
488 				return;
489 			}
490 		} else {
491 			last_unnest_log_time = tv.tv_sec;
492 			shared_region_unnest_log_count = 0;
493 		}
494 	}
495 
496 	DTRACE_VM4(log_unnest_badness,
497 	    vm_map_t, m,
498 	    vm_map_offset_t, s,
499 	    vm_map_offset_t, e,
500 	    vm_map_offset_t, lowest_unnestable_addr);
501 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
502 }
503 
504 uint64_t
vm_purge_filebacked_pagers(void)505 vm_purge_filebacked_pagers(void)
506 {
507 	uint64_t pages_purged;
508 
509 	pages_purged = 0;
510 	pages_purged += apple_protect_pager_purge_all();
511 	pages_purged += shared_region_pager_purge_all();
512 	pages_purged += dyld_pager_purge_all();
513 #if DEVELOPMENT || DEBUG
514 	printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
515 #endif /* DEVELOPMENT || DEBUG */
516 	return pages_purged;
517 }
518 
519 int
useracc(user_addr_t addr,user_size_t len,int prot)520 useracc(
521 	user_addr_t     addr,
522 	user_size_t     len,
523 	int     prot)
524 {
525 	vm_map_t        map;
526 
527 	map = current_map();
528 	return vm_map_check_protection(
529 		map,
530 		vm_map_trunc_page(addr,
531 		vm_map_page_mask(map)),
532 		vm_map_round_page(addr + len,
533 		vm_map_page_mask(map)),
534 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
535 }
536 
537 int
vslock(user_addr_t addr,user_size_t len)538 vslock(
539 	user_addr_t     addr,
540 	user_size_t     len)
541 {
542 	kern_return_t   kret;
543 	vm_map_t        map;
544 
545 	map = current_map();
546 	kret = vm_map_wire_kernel(map,
547 	    vm_map_trunc_page(addr,
548 	    vm_map_page_mask(map)),
549 	    vm_map_round_page(addr + len,
550 	    vm_map_page_mask(map)),
551 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
552 	    FALSE);
553 
554 	switch (kret) {
555 	case KERN_SUCCESS:
556 		return 0;
557 	case KERN_INVALID_ADDRESS:
558 	case KERN_NO_SPACE:
559 		return ENOMEM;
560 	case KERN_PROTECTION_FAILURE:
561 		return EACCES;
562 	default:
563 		return EINVAL;
564 	}
565 }
566 
567 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)568 vsunlock(
569 	user_addr_t addr,
570 	user_size_t len,
571 	__unused int dirtied)
572 {
573 #if FIXME  /* [ */
574 	pmap_t          pmap;
575 	vm_page_t       pg;
576 	vm_map_offset_t vaddr;
577 	ppnum_t         paddr;
578 #endif  /* FIXME ] */
579 	kern_return_t   kret;
580 	vm_map_t        map;
581 
582 	map = current_map();
583 
584 #if FIXME  /* [ */
585 	if (dirtied) {
586 		pmap = get_task_pmap(current_task());
587 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
588 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
589 		    vaddr += PAGE_SIZE) {
590 			paddr = pmap_find_phys(pmap, vaddr);
591 			pg = PHYS_TO_VM_PAGE(paddr);
592 			vm_page_set_modified(pg);
593 		}
594 	}
595 #endif  /* FIXME ] */
596 #ifdef  lint
597 	dirtied++;
598 #endif  /* lint */
599 	kret = vm_map_unwire(map,
600 	    vm_map_trunc_page(addr,
601 	    vm_map_page_mask(map)),
602 	    vm_map_round_page(addr + len,
603 	    vm_map_page_mask(map)),
604 	    FALSE);
605 	switch (kret) {
606 	case KERN_SUCCESS:
607 		return 0;
608 	case KERN_INVALID_ADDRESS:
609 	case KERN_NO_SPACE:
610 		return ENOMEM;
611 	case KERN_PROTECTION_FAILURE:
612 		return EACCES;
613 	default:
614 		return EINVAL;
615 	}
616 }
617 
618 int
subyte(user_addr_t addr,int byte)619 subyte(
620 	user_addr_t addr,
621 	int byte)
622 {
623 	char character;
624 
625 	character = (char)byte;
626 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
627 }
628 
629 int
suibyte(user_addr_t addr,int byte)630 suibyte(
631 	user_addr_t addr,
632 	int byte)
633 {
634 	char character;
635 
636 	character = (char)byte;
637 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
638 }
639 
640 int
fubyte(user_addr_t addr)641 fubyte(user_addr_t addr)
642 {
643 	unsigned char byte;
644 
645 	if (copyin(addr, (void *) &byte, sizeof(char))) {
646 		return -1;
647 	}
648 	return byte;
649 }
650 
651 int
fuibyte(user_addr_t addr)652 fuibyte(user_addr_t addr)
653 {
654 	unsigned char byte;
655 
656 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
657 		return -1;
658 	}
659 	return byte;
660 }
661 
662 int
suword(user_addr_t addr,long word)663 suword(
664 	user_addr_t addr,
665 	long word)
666 {
667 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
668 }
669 
670 long
fuword(user_addr_t addr)671 fuword(user_addr_t addr)
672 {
673 	long word = 0;
674 
675 	if (copyin(addr, (void *) &word, sizeof(int))) {
676 		return -1;
677 	}
678 	return word;
679 }
680 
681 /* suiword and fuiword are the same as suword and fuword, respectively */
682 
683 int
suiword(user_addr_t addr,long word)684 suiword(
685 	user_addr_t addr,
686 	long word)
687 {
688 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
689 }
690 
691 long
fuiword(user_addr_t addr)692 fuiword(user_addr_t addr)
693 {
694 	long word = 0;
695 
696 	if (copyin(addr, (void *) &word, sizeof(int))) {
697 		return -1;
698 	}
699 	return word;
700 }
701 
702 /*
703  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
704  * fetching and setting of process-sized size_t and pointer values.
705  */
706 int
sulong(user_addr_t addr,int64_t word)707 sulong(user_addr_t addr, int64_t word)
708 {
709 	if (IS_64BIT_PROCESS(current_proc())) {
710 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
711 	} else {
712 		return suiword(addr, (long)word);
713 	}
714 }
715 
716 int64_t
fulong(user_addr_t addr)717 fulong(user_addr_t addr)
718 {
719 	int64_t longword;
720 
721 	if (IS_64BIT_PROCESS(current_proc())) {
722 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
723 			return -1;
724 		}
725 		return longword;
726 	} else {
727 		return (int64_t)fuiword(addr);
728 	}
729 }
730 
731 int
suulong(user_addr_t addr,uint64_t uword)732 suulong(user_addr_t addr, uint64_t uword)
733 {
734 	if (IS_64BIT_PROCESS(current_proc())) {
735 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
736 	} else {
737 		return suiword(addr, (uint32_t)uword);
738 	}
739 }
740 
741 uint64_t
fuulong(user_addr_t addr)742 fuulong(user_addr_t addr)
743 {
744 	uint64_t ulongword;
745 
746 	if (IS_64BIT_PROCESS(current_proc())) {
747 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
748 			return -1ULL;
749 		}
750 		return ulongword;
751 	} else {
752 		return (uint64_t)fuiword(addr);
753 	}
754 }
755 
756 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)757 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
758 {
759 	return ENOTSUP;
760 }
761 
762 /*
763  * pid_for_task
764  *
765  * Find the BSD process ID for the Mach task associated with the given Mach port
766  * name
767  *
768  * Parameters:	args		User argument descriptor (see below)
769  *
770  * Indirect parameters:	args->t		Mach port name
771  *                      args->pid	Process ID (returned value; see below)
772  *
773  * Returns:	KERL_SUCCESS	Success
774  *              KERN_FAILURE	Not success
775  *
776  * Implicit returns: args->pid		Process ID
777  *
778  */
779 kern_return_t
pid_for_task(struct pid_for_task_args * args)780 pid_for_task(
781 	struct pid_for_task_args *args)
782 {
783 	mach_port_name_t        t = args->t;
784 	user_addr_t             pid_addr  = args->pid;
785 	proc_t p;
786 	task_t          t1;
787 	int     pid = -1;
788 	kern_return_t   err = KERN_SUCCESS;
789 
790 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
791 	AUDIT_ARG(mach_port1, t);
792 
793 	t1 = port_name_to_task_name(t);
794 
795 	if (t1 == TASK_NULL) {
796 		err = KERN_FAILURE;
797 		goto pftout;
798 	} else {
799 		p = get_bsdtask_info(t1);
800 		if (p) {
801 			pid  = proc_pid(p);
802 			err = KERN_SUCCESS;
803 		} else if (task_is_a_corpse(t1)) {
804 			pid = task_pid(t1);
805 			err = KERN_SUCCESS;
806 		} else {
807 			err = KERN_FAILURE;
808 		}
809 	}
810 	task_deallocate(t1);
811 pftout:
812 	AUDIT_ARG(pid, pid);
813 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
814 	AUDIT_MACH_SYSCALL_EXIT(err);
815 	return err;
816 }
817 
818 /*
819  *
820  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
821  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
822  *
823  */
824 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
825 
826 /*
827  *	Routine:	task_for_pid_posix_check
828  *	Purpose:
829  *			Verify that the current process should be allowed to
830  *			get the target process's task port. This is only
831  *			permitted if:
832  *			- The current process is root
833  *			OR all of the following are true:
834  *			- The target process's real, effective, and saved uids
835  *			  are the same as the current proc's euid,
836  *			- The target process's group set is a subset of the
837  *			  calling process's group set, and
838  *			- The target process hasn't switched credentials.
839  *
840  *	Returns:	TRUE: permitted
841  *			FALSE: denied
842  */
843 static int
task_for_pid_posix_check(proc_t target)844 task_for_pid_posix_check(proc_t target)
845 {
846 	kauth_cred_t targetcred, mycred;
847 	bool checkcredentials;
848 	uid_t myuid;
849 	int allowed;
850 
851 	/* No task_for_pid on bad targets */
852 	if (target->p_stat == SZOMB) {
853 		return FALSE;
854 	}
855 
856 	mycred = kauth_cred_get();
857 	myuid = kauth_cred_getuid(mycred);
858 
859 	/* If we're running as root, the check passes */
860 	if (kauth_cred_issuser(mycred)) {
861 		return TRUE;
862 	}
863 
864 	/* We're allowed to get our own task port */
865 	if (target == current_proc()) {
866 		return TRUE;
867 	}
868 
869 	/*
870 	 * Under DENY, only root can get another proc's task port,
871 	 * so no more checks are needed.
872 	 */
873 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
874 		return FALSE;
875 	}
876 
877 	targetcred = kauth_cred_proc_ref(target);
878 	allowed = TRUE;
879 
880 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
881 
882 	if (checkcredentials) {
883 		/* Do target's ruid, euid, and saved uid match my euid? */
884 		if ((kauth_cred_getuid(targetcred) != myuid) ||
885 		    (kauth_cred_getruid(targetcred) != myuid) ||
886 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
887 			allowed = FALSE;
888 			goto out;
889 		}
890 		/* Are target's groups a subset of my groups? */
891 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
892 		    allowed == 0) {
893 			allowed = FALSE;
894 			goto out;
895 		}
896 	}
897 
898 	/* Has target switched credentials? */
899 	if (target->p_flag & P_SUGID) {
900 		allowed = FALSE;
901 		goto out;
902 	}
903 
904 out:
905 	kauth_cred_unref(&targetcred);
906 	return allowed;
907 }
908 
909 /*
910  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
911  *
912  *	Description:	Waits for the user space daemon to respond to the request
913  *			we made. Function declared non inline to be visible in
914  *			stackshots and spindumps as well as debugging.
915  */
916 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)917 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
918 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
919 {
920 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
921 }
922 
923 /*
924  *	Routine:	task_for_pid
925  *	Purpose:
926  *		Get the task port for another "process", named by its
927  *		process ID on the same host as "target_task".
928  *
929  *		Only permitted to privileged processes, or processes
930  *		with the same user ID.
931  *
932  *		Note: if pid == 0, an error is return no matter who is calling.
933  *
934  * XXX This should be a BSD system call, not a Mach trap!!!
935  */
936 kern_return_t
task_for_pid(struct task_for_pid_args * args)937 task_for_pid(
938 	struct task_for_pid_args *args)
939 {
940 	mach_port_name_t        target_tport = args->target_tport;
941 	int                     pid = args->pid;
942 	user_addr_t             task_addr = args->t;
943 	proc_t                  p = PROC_NULL;
944 	task_t                  t1 = TASK_NULL;
945 	task_t                  task = TASK_NULL;
946 	mach_port_name_t        tret = MACH_PORT_NULL;
947 	ipc_port_t              tfpport = MACH_PORT_NULL;
948 	void                    * sright = NULL;
949 	int                     error = 0;
950 	boolean_t               is_current_proc = FALSE;
951 	struct proc_ident       pident = {0};
952 
953 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
954 	AUDIT_ARG(pid, pid);
955 	AUDIT_ARG(mach_port1, target_tport);
956 
957 	/* Always check if pid == 0 */
958 	if (pid == 0) {
959 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
960 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
961 		return KERN_FAILURE;
962 	}
963 
964 	t1 = port_name_to_task(target_tport);
965 	if (t1 == TASK_NULL) {
966 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
967 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
968 		return KERN_FAILURE;
969 	}
970 
971 
972 	p = proc_find(pid);
973 	if (p == PROC_NULL) {
974 		error = KERN_FAILURE;
975 		goto tfpout;
976 	}
977 	pident = proc_ident(p);
978 	is_current_proc = (p == current_proc());
979 
980 #if CONFIG_AUDIT
981 	AUDIT_ARG(process, p);
982 #endif
983 
984 	if (!(task_for_pid_posix_check(p))) {
985 		error = KERN_FAILURE;
986 		goto tfpout;
987 	}
988 
989 	if (proc_task(p) == TASK_NULL) {
990 		error = KERN_SUCCESS;
991 		goto tfpout;
992 	}
993 
994 	/*
995 	 * Grab a task reference and drop the proc reference as the proc ref
996 	 * shouldn't be held accross upcalls.
997 	 */
998 	task = proc_task(p);
999 	task_reference(task);
1000 
1001 	proc_rele(p);
1002 	p = PROC_NULL;
1003 
1004 #if CONFIG_MACF
1005 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1006 	if (error) {
1007 		error = KERN_FAILURE;
1008 		goto tfpout;
1009 	}
1010 #endif
1011 
1012 	/* If we aren't root and target's task access port is set... */
1013 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1014 	    !is_current_proc &&
1015 	    (task_get_task_access_port(task, &tfpport) == 0) &&
1016 	    (tfpport != IPC_PORT_NULL)) {
1017 		if (tfpport == IPC_PORT_DEAD) {
1018 			error = KERN_PROTECTION_FAILURE;
1019 			goto tfpout;
1020 		}
1021 
1022 		/* Call up to the task access server */
1023 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1024 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1025 
1026 		if (error != MACH_MSG_SUCCESS) {
1027 			if (error == MACH_RCV_INTERRUPTED) {
1028 				error = KERN_ABORTED;
1029 			} else {
1030 				error = KERN_FAILURE;
1031 			}
1032 			goto tfpout;
1033 		}
1034 	}
1035 
1036 	/* Grant task port access */
1037 	extmod_statistics_incr_task_for_pid(task);
1038 
1039 	/* this reference will be consumed during conversion */
1040 	task_reference(task);
1041 	if (task == current_task()) {
1042 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
1043 		sright = (void *)convert_task_to_port_pinned(task);
1044 	} else {
1045 		sright = (void *)convert_task_to_port(task);
1046 	}
1047 	/* extra task ref consumed */
1048 
1049 	/*
1050 	 * Check if the task has been corpsified. We must do so after conversion
1051 	 * since we don't hold locks and may have grabbed a corpse control port
1052 	 * above which will prevent no-senders notification delivery.
1053 	 */
1054 	if (task_is_a_corpse(task)) {
1055 		ipc_port_release_send(sright);
1056 		error = KERN_FAILURE;
1057 		goto tfpout;
1058 	}
1059 
1060 	tret = ipc_port_copyout_send(
1061 		sright,
1062 		get_task_ipcspace(current_task()));
1063 
1064 	error = KERN_SUCCESS;
1065 
1066 tfpout:
1067 	task_deallocate(t1);
1068 	AUDIT_ARG(mach_port2, tret);
1069 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1070 
1071 	if (tfpport != IPC_PORT_NULL) {
1072 		ipc_port_release_send(tfpport);
1073 	}
1074 	if (task != TASK_NULL) {
1075 		task_deallocate(task);
1076 	}
1077 	if (p != PROC_NULL) {
1078 		proc_rele(p);
1079 	}
1080 	AUDIT_MACH_SYSCALL_EXIT(error);
1081 	return error;
1082 }
1083 
1084 /*
1085  *	Routine:	task_name_for_pid
1086  *	Purpose:
1087  *		Get the task name port for another "process", named by its
1088  *		process ID on the same host as "target_task".
1089  *
1090  *		Only permitted to privileged processes, or processes
1091  *		with the same user ID.
1092  *
1093  * XXX This should be a BSD system call, not a Mach trap!!!
1094  */
1095 
1096 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1097 task_name_for_pid(
1098 	struct task_name_for_pid_args *args)
1099 {
1100 	mach_port_name_t        target_tport = args->target_tport;
1101 	int                     pid = args->pid;
1102 	user_addr_t             task_addr = args->t;
1103 	proc_t                  p = PROC_NULL;
1104 	task_t                  t1 = TASK_NULL;
1105 	mach_port_name_t        tret = MACH_PORT_NULL;
1106 	void * sright;
1107 	int error = 0, refheld = 0;
1108 	kauth_cred_t target_cred;
1109 
1110 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1111 	AUDIT_ARG(pid, pid);
1112 	AUDIT_ARG(mach_port1, target_tport);
1113 
1114 	t1 = port_name_to_task(target_tport);
1115 	if (t1 == TASK_NULL) {
1116 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1117 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1118 		return KERN_FAILURE;
1119 	}
1120 
1121 	p = proc_find(pid);
1122 	if (p != PROC_NULL) {
1123 		AUDIT_ARG(process, p);
1124 		target_cred = kauth_cred_proc_ref(p);
1125 		refheld = 1;
1126 
1127 		if ((p->p_stat != SZOMB)
1128 		    && ((current_proc() == p)
1129 		    || kauth_cred_issuser(kauth_cred_get())
1130 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1131 		    ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1132 		    || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1133 		    )) {
1134 			if (proc_task(p) != TASK_NULL) {
1135 				struct proc_ident pident = proc_ident(p);
1136 
1137 				task_t task = proc_task(p);
1138 
1139 				task_reference(task);
1140 				proc_rele(p);
1141 				p = PROC_NULL;
1142 #if CONFIG_MACF
1143 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1144 				if (error) {
1145 					task_deallocate(task);
1146 					goto noperm;
1147 				}
1148 #endif
1149 				sright = (void *)convert_task_name_to_port(task);
1150 				task = NULL;
1151 				tret = ipc_port_copyout_send(sright,
1152 				    get_task_ipcspace(current_task()));
1153 			} else {
1154 				tret  = MACH_PORT_NULL;
1155 			}
1156 
1157 			AUDIT_ARG(mach_port2, tret);
1158 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1159 			task_deallocate(t1);
1160 			error = KERN_SUCCESS;
1161 			goto tnfpout;
1162 		}
1163 	}
1164 
1165 #if CONFIG_MACF
1166 noperm:
1167 #endif
1168 	task_deallocate(t1);
1169 	tret = MACH_PORT_NULL;
1170 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1171 	error = KERN_FAILURE;
1172 tnfpout:
1173 	if (refheld != 0) {
1174 		kauth_cred_unref(&target_cred);
1175 	}
1176 	if (p != PROC_NULL) {
1177 		proc_rele(p);
1178 	}
1179 	AUDIT_MACH_SYSCALL_EXIT(error);
1180 	return error;
1181 }
1182 
1183 /*
1184  *	Routine:	task_inspect_for_pid
1185  *	Purpose:
1186  *		Get the task inspect port for another "process", named by its
1187  *		process ID on the same host as "target_task".
1188  */
1189 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1190 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1191 {
1192 	mach_port_name_t        target_tport = args->target_tport;
1193 	int                     pid = args->pid;
1194 	user_addr_t             task_addr = args->t;
1195 
1196 	proc_t                  proc = PROC_NULL;
1197 	task_t                  t1 = TASK_NULL;
1198 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1199 	mach_port_name_t        tret = MACH_PORT_NULL;
1200 	ipc_port_t              tfpport = MACH_PORT_NULL;
1201 	int                     error = 0;
1202 	void                    *sright = NULL;
1203 	boolean_t               is_current_proc = FALSE;
1204 	struct proc_ident       pident = {0};
1205 
1206 	/* Disallow inspect port for kernel_task */
1207 	if (pid == 0) {
1208 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1209 		return EPERM;
1210 	}
1211 
1212 	t1 = port_name_to_task(target_tport);
1213 	if (t1 == TASK_NULL) {
1214 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1215 		return EINVAL;
1216 	}
1217 
1218 	proc = proc_find(pid);
1219 	if (proc == PROC_NULL) {
1220 		error = ESRCH;
1221 		goto tifpout;
1222 	}
1223 	pident = proc_ident(proc);
1224 	is_current_proc = (proc == current_proc());
1225 
1226 	if (!(task_for_pid_posix_check(proc))) {
1227 		error = EPERM;
1228 		goto tifpout;
1229 	}
1230 
1231 	task_insp = proc_task(proc);
1232 	if (task_insp == TASK_INSPECT_NULL) {
1233 		goto tifpout;
1234 	}
1235 
1236 	/*
1237 	 * Grab a task reference and drop the proc reference before making any upcalls.
1238 	 */
1239 	task_reference(task_insp);
1240 
1241 	proc_rele(proc);
1242 	proc = PROC_NULL;
1243 
1244 #if CONFIG_MACF
1245 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1246 	if (error) {
1247 		error = EPERM;
1248 		goto tifpout;
1249 	}
1250 #endif
1251 
1252 	/* If we aren't root and target's task access port is set... */
1253 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1254 	    !is_current_proc &&
1255 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1256 	    (tfpport != IPC_PORT_NULL)) {
1257 		if (tfpport == IPC_PORT_DEAD) {
1258 			error = EACCES;
1259 			goto tifpout;
1260 		}
1261 
1262 
1263 		/* Call up to the task access server */
1264 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1265 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1266 
1267 		if (error != MACH_MSG_SUCCESS) {
1268 			if (error == MACH_RCV_INTERRUPTED) {
1269 				error = EINTR;
1270 			} else {
1271 				error = EPERM;
1272 			}
1273 			goto tifpout;
1274 		}
1275 	}
1276 
1277 	/* Check if the task has been corpsified */
1278 	if (task_is_a_corpse(task_insp)) {
1279 		error = EACCES;
1280 		goto tifpout;
1281 	}
1282 
1283 	/* could be IP_NULL, consumes a ref */
1284 	sright = (void*) convert_task_inspect_to_port(task_insp);
1285 	task_insp = TASK_INSPECT_NULL;
1286 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1287 
1288 tifpout:
1289 	task_deallocate(t1);
1290 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1291 	if (proc != PROC_NULL) {
1292 		proc_rele(proc);
1293 	}
1294 	if (tfpport != IPC_PORT_NULL) {
1295 		ipc_port_release_send(tfpport);
1296 	}
1297 	if (task_insp != TASK_INSPECT_NULL) {
1298 		task_deallocate(task_insp);
1299 	}
1300 
1301 	*ret = error;
1302 	return error;
1303 }
1304 
1305 /*
1306  *	Routine:	task_read_for_pid
1307  *	Purpose:
1308  *		Get the task read port for another "process", named by its
1309  *		process ID on the same host as "target_task".
1310  */
1311 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1312 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1313 {
1314 	mach_port_name_t        target_tport = args->target_tport;
1315 	int                     pid = args->pid;
1316 	user_addr_t             task_addr = args->t;
1317 
1318 	proc_t                  proc = PROC_NULL;
1319 	task_t                  t1 = TASK_NULL;
1320 	task_read_t             task_read = TASK_READ_NULL;
1321 	mach_port_name_t        tret = MACH_PORT_NULL;
1322 	ipc_port_t              tfpport = MACH_PORT_NULL;
1323 	int                     error = 0;
1324 	void                    *sright = NULL;
1325 	boolean_t               is_current_proc = FALSE;
1326 	struct proc_ident       pident = {0};
1327 
1328 	/* Disallow read port for kernel_task */
1329 	if (pid == 0) {
1330 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1331 		return EPERM;
1332 	}
1333 
1334 	t1 = port_name_to_task(target_tport);
1335 	if (t1 == TASK_NULL) {
1336 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1337 		return EINVAL;
1338 	}
1339 
1340 	proc = proc_find(pid);
1341 	if (proc == PROC_NULL) {
1342 		error = ESRCH;
1343 		goto trfpout;
1344 	}
1345 	pident = proc_ident(proc);
1346 	is_current_proc = (proc == current_proc());
1347 
1348 	if (!(task_for_pid_posix_check(proc))) {
1349 		error = EPERM;
1350 		goto trfpout;
1351 	}
1352 
1353 	task_read = proc_task(proc);
1354 	if (task_read == TASK_INSPECT_NULL) {
1355 		goto trfpout;
1356 	}
1357 
1358 	/*
1359 	 * Grab a task reference and drop the proc reference before making any upcalls.
1360 	 */
1361 	task_reference(task_read);
1362 
1363 	proc_rele(proc);
1364 	proc = PROC_NULL;
1365 
1366 #if CONFIG_MACF
1367 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1368 	if (error) {
1369 		error = EPERM;
1370 		goto trfpout;
1371 	}
1372 #endif
1373 
1374 	/* If we aren't root and target's task access port is set... */
1375 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1376 	    !is_current_proc &&
1377 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1378 	    (tfpport != IPC_PORT_NULL)) {
1379 		if (tfpport == IPC_PORT_DEAD) {
1380 			error = EACCES;
1381 			goto trfpout;
1382 		}
1383 
1384 
1385 		/* Call up to the task access server */
1386 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1387 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1388 
1389 		if (error != MACH_MSG_SUCCESS) {
1390 			if (error == MACH_RCV_INTERRUPTED) {
1391 				error = EINTR;
1392 			} else {
1393 				error = EPERM;
1394 			}
1395 			goto trfpout;
1396 		}
1397 	}
1398 
1399 	/* Check if the task has been corpsified */
1400 	if (task_is_a_corpse(task_read)) {
1401 		error = EACCES;
1402 		goto trfpout;
1403 	}
1404 
1405 	/* could be IP_NULL, consumes a ref */
1406 	sright = (void*) convert_task_read_to_port(task_read);
1407 	task_read = TASK_READ_NULL;
1408 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1409 
1410 trfpout:
1411 	task_deallocate(t1);
1412 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1413 	if (proc != PROC_NULL) {
1414 		proc_rele(proc);
1415 	}
1416 	if (tfpport != IPC_PORT_NULL) {
1417 		ipc_port_release_send(tfpport);
1418 	}
1419 	if (task_read != TASK_READ_NULL) {
1420 		task_deallocate(task_read);
1421 	}
1422 
1423 	*ret = error;
1424 	return error;
1425 }
1426 
1427 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1428 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1429 {
1430 	task_t  target = NULL;
1431 	proc_t  targetproc = PROC_NULL;
1432 	int     pid = args->pid;
1433 	int     error = 0;
1434 	mach_port_t tfpport = MACH_PORT_NULL;
1435 
1436 	if (pid == 0) {
1437 		error = EPERM;
1438 		goto out;
1439 	}
1440 
1441 	targetproc = proc_find(pid);
1442 	if (targetproc == PROC_NULL) {
1443 		error = ESRCH;
1444 		goto out;
1445 	}
1446 
1447 	if (!task_for_pid_posix_check(targetproc) &&
1448 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1449 		error = EPERM;
1450 		goto out;
1451 	}
1452 
1453 #if CONFIG_MACF
1454 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1455 	if (error) {
1456 		error = EPERM;
1457 		goto out;
1458 	}
1459 #endif
1460 
1461 	target = proc_task(targetproc);
1462 #if XNU_TARGET_OS_OSX
1463 	if (target != TASK_NULL) {
1464 		/* If we aren't root and target's task access port is set... */
1465 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1466 		    targetproc != current_proc() &&
1467 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1468 		    (tfpport != IPC_PORT_NULL)) {
1469 			if (tfpport == IPC_PORT_DEAD) {
1470 				error = EACCES;
1471 				goto out;
1472 			}
1473 
1474 			/* Call up to the task access server */
1475 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1476 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1477 
1478 			if (error != MACH_MSG_SUCCESS) {
1479 				if (error == MACH_RCV_INTERRUPTED) {
1480 					error = EINTR;
1481 				} else {
1482 					error = EPERM;
1483 				}
1484 				goto out;
1485 			}
1486 		}
1487 	}
1488 #endif /* XNU_TARGET_OS_OSX */
1489 
1490 	task_reference(target);
1491 	error = task_pidsuspend(target);
1492 	if (error) {
1493 		if (error == KERN_INVALID_ARGUMENT) {
1494 			error = EINVAL;
1495 		} else {
1496 			error = EPERM;
1497 		}
1498 	}
1499 #if CONFIG_MEMORYSTATUS
1500 	else {
1501 		memorystatus_on_suspend(targetproc);
1502 	}
1503 #endif
1504 
1505 	task_deallocate(target);
1506 
1507 out:
1508 	if (tfpport != IPC_PORT_NULL) {
1509 		ipc_port_release_send(tfpport);
1510 	}
1511 
1512 	if (targetproc != PROC_NULL) {
1513 		proc_rele(targetproc);
1514 	}
1515 	*ret = error;
1516 	return error;
1517 }
1518 
1519 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1520 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1521 {
1522 	mach_port_name_t        target_tport = args->target_tport;
1523 	int                     pid = args->pid;
1524 	user_addr_t             task_addr = args->t;
1525 	proc_t                  p = PROC_NULL;
1526 	task_t                  t1 = TASK_NULL;
1527 	task_t                  task = TASK_NULL;
1528 	mach_port_name_t        tret = MACH_PORT_NULL;
1529 	ipc_port_t              tfpport = MACH_PORT_NULL;
1530 	ipc_port_t              sright = NULL;
1531 	int                     error = 0;
1532 	boolean_t               is_current_proc = FALSE;
1533 	struct proc_ident       pident = {0};
1534 
1535 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1536 	AUDIT_ARG(pid, pid);
1537 	AUDIT_ARG(mach_port1, target_tport);
1538 
1539 	/* Always check if pid == 0 */
1540 	if (pid == 0) {
1541 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1542 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1543 		return KERN_FAILURE;
1544 	}
1545 
1546 	t1 = port_name_to_task(target_tport);
1547 	if (t1 == TASK_NULL) {
1548 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1549 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1550 		return KERN_FAILURE;
1551 	}
1552 
1553 	p = proc_find(pid);
1554 	if (p == PROC_NULL) {
1555 		error = KERN_FAILURE;
1556 		goto tfpout;
1557 	}
1558 	pident = proc_ident(p);
1559 	is_current_proc = (p == current_proc());
1560 
1561 #if CONFIG_AUDIT
1562 	AUDIT_ARG(process, p);
1563 #endif
1564 
1565 	if (!(task_for_pid_posix_check(p))) {
1566 		error = KERN_FAILURE;
1567 		goto tfpout;
1568 	}
1569 
1570 	if (proc_task(p) == TASK_NULL) {
1571 		error = KERN_SUCCESS;
1572 		goto tfpout;
1573 	}
1574 
1575 	/*
1576 	 * Grab a task reference and drop the proc reference before making any upcalls.
1577 	 */
1578 	task = proc_task(p);
1579 	task_reference(task);
1580 
1581 	proc_rele(p);
1582 	p = PROC_NULL;
1583 
1584 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1585 #if CONFIG_MACF
1586 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1587 		if (error) {
1588 			error = KERN_FAILURE;
1589 			goto tfpout;
1590 		}
1591 #endif
1592 
1593 		/* If we aren't root and target's task access port is set... */
1594 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1595 		    !is_current_proc &&
1596 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1597 		    (tfpport != IPC_PORT_NULL)) {
1598 			if (tfpport == IPC_PORT_DEAD) {
1599 				error = KERN_PROTECTION_FAILURE;
1600 				goto tfpout;
1601 			}
1602 
1603 
1604 			/* Call up to the task access server */
1605 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1606 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1607 
1608 			if (error != MACH_MSG_SUCCESS) {
1609 				if (error == MACH_RCV_INTERRUPTED) {
1610 					error = KERN_ABORTED;
1611 				} else {
1612 					error = KERN_FAILURE;
1613 				}
1614 				goto tfpout;
1615 			}
1616 		}
1617 	}
1618 
1619 	/* Check if the task has been corpsified */
1620 	if (task_is_a_corpse(task)) {
1621 		error = KERN_FAILURE;
1622 		goto tfpout;
1623 	}
1624 
1625 	error = task_get_debug_control_port(task, &sright);
1626 	if (error != KERN_SUCCESS) {
1627 		goto tfpout;
1628 	}
1629 
1630 	tret = ipc_port_copyout_send(
1631 		sright,
1632 		get_task_ipcspace(current_task()));
1633 
1634 	error = KERN_SUCCESS;
1635 
1636 tfpout:
1637 	task_deallocate(t1);
1638 	AUDIT_ARG(mach_port2, tret);
1639 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1640 
1641 	if (tfpport != IPC_PORT_NULL) {
1642 		ipc_port_release_send(tfpport);
1643 	}
1644 	if (task != TASK_NULL) {
1645 		task_deallocate(task);
1646 	}
1647 	if (p != PROC_NULL) {
1648 		proc_rele(p);
1649 	}
1650 	AUDIT_MACH_SYSCALL_EXIT(error);
1651 	return error;
1652 }
1653 
1654 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1655 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1656 {
1657 	task_t  target = NULL;
1658 	proc_t  targetproc = PROC_NULL;
1659 	int     pid = args->pid;
1660 	int     error = 0;
1661 	mach_port_t tfpport = MACH_PORT_NULL;
1662 
1663 	if (pid == 0) {
1664 		error = EPERM;
1665 		goto out;
1666 	}
1667 
1668 	targetproc = proc_find(pid);
1669 	if (targetproc == PROC_NULL) {
1670 		error = ESRCH;
1671 		goto out;
1672 	}
1673 
1674 	if (!task_for_pid_posix_check(targetproc) &&
1675 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1676 		error = EPERM;
1677 		goto out;
1678 	}
1679 
1680 #if CONFIG_MACF
1681 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1682 	if (error) {
1683 		error = EPERM;
1684 		goto out;
1685 	}
1686 #endif
1687 
1688 	target = proc_task(targetproc);
1689 #if XNU_TARGET_OS_OSX
1690 	if (target != TASK_NULL) {
1691 		/* If we aren't root and target's task access port is set... */
1692 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1693 		    targetproc != current_proc() &&
1694 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1695 		    (tfpport != IPC_PORT_NULL)) {
1696 			if (tfpport == IPC_PORT_DEAD) {
1697 				error = EACCES;
1698 				goto out;
1699 			}
1700 
1701 			/* Call up to the task access server */
1702 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1703 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1704 
1705 			if (error != MACH_MSG_SUCCESS) {
1706 				if (error == MACH_RCV_INTERRUPTED) {
1707 					error = EINTR;
1708 				} else {
1709 					error = EPERM;
1710 				}
1711 				goto out;
1712 			}
1713 		}
1714 	}
1715 #endif /* XNU_TARGET_OS_OSX */
1716 
1717 #if !XNU_TARGET_OS_OSX
1718 #if SOCKETS
1719 	resume_proc_sockets(targetproc);
1720 #endif /* SOCKETS */
1721 #endif /* !XNU_TARGET_OS_OSX */
1722 
1723 	task_reference(target);
1724 
1725 #if CONFIG_MEMORYSTATUS
1726 	memorystatus_on_resume(targetproc);
1727 #endif
1728 
1729 	error = task_pidresume(target);
1730 	if (error) {
1731 		if (error == KERN_INVALID_ARGUMENT) {
1732 			error = EINVAL;
1733 		} else {
1734 			if (error == KERN_MEMORY_ERROR) {
1735 				psignal(targetproc, SIGKILL);
1736 				error = EIO;
1737 			} else {
1738 				error = EPERM;
1739 			}
1740 		}
1741 	}
1742 
1743 	task_deallocate(target);
1744 
1745 out:
1746 	if (tfpport != IPC_PORT_NULL) {
1747 		ipc_port_release_send(tfpport);
1748 	}
1749 
1750 	if (targetproc != PROC_NULL) {
1751 		proc_rele(targetproc);
1752 	}
1753 
1754 	*ret = error;
1755 	return error;
1756 }
1757 
1758 #if !XNU_TARGET_OS_OSX
1759 /*
1760  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1761  * When a process is specified, this call is blocking, otherwise we wake up the
1762  * freezer thread and do not block on a process being frozen.
1763  */
1764 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1765 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1766 {
1767 	int     error = 0;
1768 	proc_t  targetproc = PROC_NULL;
1769 	int     pid = args->pid;
1770 
1771 #ifndef CONFIG_FREEZE
1772 	#pragma unused(pid)
1773 #else
1774 
1775 	/*
1776 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1777 	 */
1778 
1779 	if (pid >= 0) {
1780 		targetproc = proc_find(pid);
1781 
1782 		if (targetproc == PROC_NULL) {
1783 			error = ESRCH;
1784 			goto out;
1785 		}
1786 
1787 		if (!task_for_pid_posix_check(targetproc)) {
1788 			error = EPERM;
1789 			goto out;
1790 		}
1791 	}
1792 
1793 #if CONFIG_MACF
1794 	//Note that targetproc may be null
1795 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1796 	if (error) {
1797 		error = EPERM;
1798 		goto out;
1799 	}
1800 #endif
1801 
1802 	if (pid == -2) {
1803 		vm_pageout_anonymous_pages();
1804 	} else if (pid == -1) {
1805 		memorystatus_on_inactivity(targetproc);
1806 	} else {
1807 		error = memorystatus_freeze_process_sync(targetproc);
1808 	}
1809 
1810 out:
1811 
1812 #endif /* CONFIG_FREEZE */
1813 
1814 	if (targetproc != PROC_NULL) {
1815 		proc_rele(targetproc);
1816 	}
1817 	*ret = error;
1818 	return error;
1819 }
1820 #endif /* !XNU_TARGET_OS_OSX */
1821 
1822 #if SOCKETS
1823 int
networking_memstatus_callout(proc_t p,uint32_t status)1824 networking_memstatus_callout(proc_t p, uint32_t status)
1825 {
1826 	struct fileproc *fp;
1827 
1828 	/*
1829 	 * proc list lock NOT held
1830 	 * proc lock NOT held
1831 	 * a reference on the proc has been held / shall be dropped by the caller.
1832 	 */
1833 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1834 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1835 
1836 	proc_fdlock(p);
1837 
1838 	fdt_foreach(fp, p) {
1839 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1840 #if NECP
1841 		case DTYPE_NETPOLICY:
1842 			necp_fd_memstatus(p, status,
1843 			    (struct necp_fd_data *)fp_get_data(fp));
1844 			break;
1845 #endif /* NECP */
1846 #if SKYWALK
1847 		case DTYPE_CHANNEL:
1848 			kern_channel_memstatus(p, status,
1849 			    (struct kern_channel *)fp_get_data(fp));
1850 			break;
1851 #endif /* SKYWALK */
1852 		default:
1853 			break;
1854 		}
1855 	}
1856 	proc_fdunlock(p);
1857 
1858 	return 1;
1859 }
1860 
1861 #if SKYWALK
1862 /*
1863  * Since we make multiple passes across the fileproc array, record the
1864  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1865  * large enough to accomodate most, if not all cases.  If we find more,
1866  * we'll go to the slow path during second pass.
1867  */
1868 #define MAX_CHANNELS    8       /* should be more than enough */
1869 #endif /* SKYWALK */
1870 
1871 static int
networking_defunct_callout(proc_t p,void * arg)1872 networking_defunct_callout(proc_t p, void *arg)
1873 {
1874 	struct pid_shutdown_sockets_args *args = arg;
1875 	int pid = args->pid;
1876 	int level = args->level;
1877 	struct fileproc *fp;
1878 #if SKYWALK
1879 	int i;
1880 	int channel_count = 0;
1881 	struct kern_channel *channel_array[MAX_CHANNELS];
1882 
1883 	bzero(&channel_array, sizeof(channel_array));
1884 #endif /* SKYWALK */
1885 
1886 	proc_fdlock(p);
1887 
1888 	fdt_foreach(fp, p) {
1889 		struct fileglob *fg = fp->fp_glob;
1890 
1891 		switch (FILEGLOB_DTYPE(fg)) {
1892 		case DTYPE_SOCKET: {
1893 			struct socket *so = (struct socket *)fg_get_data(fg);
1894 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1895 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1896 				/* Call networking stack with socket and level */
1897 				(void)socket_defunct(p, so, level);
1898 			}
1899 			break;
1900 		}
1901 #if NECP
1902 		case DTYPE_NETPOLICY:
1903 			/* first pass: defunct necp and get stats for ntstat */
1904 			if (proc_getpid(p) == pid) {
1905 				necp_fd_defunct(p,
1906 				    (struct necp_fd_data *)fg_get_data(fg));
1907 			}
1908 			break;
1909 #endif /* NECP */
1910 #if SKYWALK
1911 		case DTYPE_CHANNEL:
1912 			/* first pass: get channels and total count */
1913 			if (proc_getpid(p) == pid) {
1914 				if (channel_count < MAX_CHANNELS) {
1915 					channel_array[channel_count] =
1916 					    (struct kern_channel *)fg_get_data(fg);
1917 				}
1918 				++channel_count;
1919 			}
1920 			break;
1921 #endif /* SKYWALK */
1922 		default:
1923 			break;
1924 		}
1925 	}
1926 
1927 #if SKYWALK
1928 	/*
1929 	 * Second pass: defunct channels/flows (after NECP).  Handle
1930 	 * the common case of up to MAX_CHANNELS count with fast path,
1931 	 * and traverse the fileproc array again only if we exceed it.
1932 	 */
1933 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1934 		ASSERT(proc_getpid(p) == pid);
1935 		for (i = 0; i < channel_count; i++) {
1936 			ASSERT(channel_array[i] != NULL);
1937 			kern_channel_defunct(p, channel_array[i]);
1938 		}
1939 	} else if (channel_count != 0) {
1940 		ASSERT(proc_getpid(p) == pid);
1941 		fdt_foreach(fp, p) {
1942 			struct fileglob *fg = fp->fp_glob;
1943 
1944 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1945 				kern_channel_defunct(p,
1946 				    (struct kern_channel *)fg_get_data(fg));
1947 			}
1948 		}
1949 	}
1950 #endif /* SKYWALK */
1951 	proc_fdunlock(p);
1952 
1953 	return PROC_RETURNED;
1954 }
1955 
1956 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1957 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1958 {
1959 	int                             error = 0;
1960 	proc_t                          targetproc = PROC_NULL;
1961 	int                             pid = args->pid;
1962 	int                             level = args->level;
1963 
1964 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1965 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1966 		error = EINVAL;
1967 		goto out;
1968 	}
1969 
1970 	targetproc = proc_find(pid);
1971 	if (targetproc == PROC_NULL) {
1972 		error = ESRCH;
1973 		goto out;
1974 	}
1975 
1976 	if (!task_for_pid_posix_check(targetproc) &&
1977 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1978 		error = EPERM;
1979 		goto out;
1980 	}
1981 
1982 #if CONFIG_MACF
1983 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1984 	if (error) {
1985 		error = EPERM;
1986 		goto out;
1987 	}
1988 #endif
1989 
1990 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1991 	    networking_defunct_callout, args, NULL, NULL);
1992 
1993 out:
1994 	if (targetproc != PROC_NULL) {
1995 		proc_rele(targetproc);
1996 	}
1997 	*ret = error;
1998 	return error;
1999 }
2000 
2001 #endif /* SOCKETS */
2002 
2003 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)2004 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2005     __unused int arg2, struct sysctl_req *req)
2006 {
2007 	int error = 0;
2008 	int new_value;
2009 
2010 	error = SYSCTL_OUT(req, arg1, sizeof(int));
2011 	if (error || req->newptr == USER_ADDR_NULL) {
2012 		return error;
2013 	}
2014 
2015 	if (!kauth_cred_issuser(kauth_cred_get())) {
2016 		return EPERM;
2017 	}
2018 
2019 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2020 		goto out;
2021 	}
2022 	if ((new_value == KERN_TFP_POLICY_DENY)
2023 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2024 		tfp_policy = new_value;
2025 	} else {
2026 		error = EINVAL;
2027 	}
2028 out:
2029 	return error;
2030 }
2031 
2032 #if defined(SECURE_KERNEL)
2033 static int kern_secure_kernel = 1;
2034 #else
2035 static int kern_secure_kernel = 0;
2036 #endif
2037 
2038 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2039 
2040 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2041 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2042     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2043 
2044 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2045     &shared_region_trace_level, 0, "");
2046 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2047     &shared_region_version, 0, "");
2048 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2049     &shared_region_persistence, 0, "");
2050 
2051 /*
2052  * shared_region_check_np:
2053  *
2054  * This system call is intended for dyld.
2055  *
2056  * dyld calls this when any process starts to see if the process's shared
2057  * region is already set up and ready to use.
2058  * This call returns the base address of the first mapping in the
2059  * process's shared region's first mapping.
2060  * dyld will then check what's mapped at that address.
2061  *
2062  * If the shared region is empty, dyld will then attempt to map the shared
2063  * cache file in the shared region via the shared_region_map_np() system call.
2064  *
2065  * If something's already mapped in the shared region, dyld will check if it
2066  * matches the shared cache it would like to use for that process.
2067  * If it matches, evrything's ready and the process can proceed and use the
2068  * shared region.
2069  * If it doesn't match, dyld will unmap the shared region and map the shared
2070  * cache into the process's address space via mmap().
2071  *
2072  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2073  * the shared region. We will remove the shared_region reference from the task.
2074  *
2075  * ERROR VALUES
2076  * EINVAL	no shared region
2077  * ENOMEM	shared region is empty
2078  * EFAULT	bad address for "start_address"
2079  */
2080 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2081 shared_region_check_np(
2082 	__unused struct proc                    *p,
2083 	struct shared_region_check_np_args      *uap,
2084 	__unused int                            *retvalp)
2085 {
2086 	vm_shared_region_t      shared_region;
2087 	mach_vm_offset_t        start_address = 0;
2088 	int                     error = 0;
2089 	kern_return_t           kr;
2090 	task_t                  task = current_task();
2091 
2092 	SHARED_REGION_TRACE_DEBUG(
2093 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2094 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2095 		proc_getpid(p), p->p_comm,
2096 		(uint64_t)uap->start_address));
2097 
2098 	/*
2099 	 * Special value of start_address used to indicate that map_with_linking() should
2100 	 * no longer be allowed in this process
2101 	 */
2102 	if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2103 		p->p_disallow_map_with_linking = TRUE;
2104 		return 0;
2105 	}
2106 
2107 	/* retrieve the current tasks's shared region */
2108 	shared_region = vm_shared_region_get(task);
2109 	if (shared_region != NULL) {
2110 		/*
2111 		 * A NULL argument is used by dyld to indicate the task
2112 		 * has unmapped its shared region.
2113 		 */
2114 		if (uap->start_address == 0) {
2115 			/* unmap it first */
2116 			vm_shared_region_remove(task, shared_region);
2117 			vm_shared_region_set(task, NULL);
2118 		} else {
2119 			/* retrieve address of its first mapping... */
2120 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2121 			if (kr != KERN_SUCCESS) {
2122 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2123 				    "check_np(0x%llx) "
2124 				    "vm_shared_region_start_address() failed\n",
2125 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2126 				    proc_getpid(p), p->p_comm,
2127 				    (uint64_t)uap->start_address));
2128 				error = ENOMEM;
2129 			} else {
2130 #if __has_feature(ptrauth_calls)
2131 				/*
2132 				 * Remap any section of the shared library that
2133 				 * has authenticated pointers into private memory.
2134 				 */
2135 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2136 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2137 					    "check_np(0x%llx) "
2138 					    "vm_shared_region_auth_remap() failed\n",
2139 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2140 					    proc_getpid(p), p->p_comm,
2141 					    (uint64_t)uap->start_address));
2142 					error = ENOMEM;
2143 				}
2144 #endif /* __has_feature(ptrauth_calls) */
2145 
2146 				/* ... and give it to the caller */
2147 				if (error == 0) {
2148 					error = copyout(&start_address,
2149 					    (user_addr_t) uap->start_address,
2150 					    sizeof(start_address));
2151 					if (error != 0) {
2152 						SHARED_REGION_TRACE_ERROR(
2153 							("shared_region: %p [%d(%s)] "
2154 							"check_np(0x%llx) "
2155 							"copyout(0x%llx) error %d\n",
2156 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2157 							proc_getpid(p), p->p_comm,
2158 							(uint64_t)uap->start_address, (uint64_t)start_address,
2159 							error));
2160 					}
2161 				}
2162 			}
2163 		}
2164 		vm_shared_region_deallocate(shared_region);
2165 	} else {
2166 		/* no shared region ! */
2167 		error = EINVAL;
2168 	}
2169 
2170 	SHARED_REGION_TRACE_DEBUG(
2171 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2172 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2173 		proc_getpid(p), p->p_comm,
2174 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2175 
2176 	return error;
2177 }
2178 
2179 
2180 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2181 shared_region_copyin(
2182 	struct proc  *p,
2183 	user_addr_t  user_addr,
2184 	unsigned int count,
2185 	unsigned int element_size,
2186 	void         *kernel_data)
2187 {
2188 	int             error = 0;
2189 	vm_size_t       size = count * element_size;
2190 
2191 	error = copyin(user_addr, kernel_data, size);
2192 	if (error) {
2193 		SHARED_REGION_TRACE_ERROR(
2194 			("shared_region: %p [%d(%s)] map(): "
2195 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2196 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2197 			proc_getpid(p), p->p_comm,
2198 			(uint64_t)user_addr, (long)size, error));
2199 	}
2200 	return error;
2201 }
2202 
2203 /*
2204  * A reasonable upper limit to prevent overflow of allocation/copyin.
2205  */
2206 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2207 
2208 /* forward declaration */
2209 __attribute__((noinline))
2210 static void shared_region_map_and_slide_cleanup(
2211 	struct proc              *p,
2212 	uint32_t                 files_count,
2213 	struct _sr_file_mappings *sr_file_mappings,
2214 	struct vm_shared_region  *shared_region);
2215 
2216 /*
2217  * Setup part of _shared_region_map_and_slide().
2218  * It had to be broken out of _shared_region_map_and_slide() to
2219  * prevent compiler inlining from blowing out the stack.
2220  */
2221 __attribute__((noinline))
2222 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2223 shared_region_map_and_slide_setup(
2224 	struct proc                         *p,
2225 	uint32_t                            files_count,
2226 	struct shared_file_np               *files,
2227 	uint32_t                            mappings_count,
2228 	struct shared_file_mapping_slide_np *mappings,
2229 	struct _sr_file_mappings            **sr_file_mappings,
2230 	struct vm_shared_region             **shared_region_ptr,
2231 	struct vnode                        *rdir_vp)
2232 {
2233 	int                             error = 0;
2234 	struct _sr_file_mappings        *srfmp;
2235 	uint32_t                        mappings_next;
2236 	struct vnode_attr               va;
2237 	off_t                           fs;
2238 #if CONFIG_MACF
2239 	vm_prot_t                       maxprot = VM_PROT_ALL;
2240 #endif
2241 	uint32_t                        i;
2242 	struct vm_shared_region         *shared_region = NULL;
2243 	boolean_t                       is_driverkit = task_is_driver(current_task());
2244 
2245 	SHARED_REGION_TRACE_DEBUG(
2246 		("shared_region: %p [%d(%s)] -> map\n",
2247 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2248 		proc_getpid(p), p->p_comm));
2249 
2250 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2251 		error = E2BIG;
2252 		goto done;
2253 	}
2254 	if (files_count == 0) {
2255 		error = EINVAL;
2256 		goto done;
2257 	}
2258 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2259 	    Z_WAITOK | Z_ZERO);
2260 	if (*sr_file_mappings == NULL) {
2261 		error = ENOMEM;
2262 		goto done;
2263 	}
2264 	mappings_next = 0;
2265 	for (i = 0; i < files_count; i++) {
2266 		srfmp = &(*sr_file_mappings)[i];
2267 		srfmp->fd = files[i].sf_fd;
2268 		srfmp->mappings_count = files[i].sf_mappings_count;
2269 		srfmp->mappings = &mappings[mappings_next];
2270 		mappings_next += srfmp->mappings_count;
2271 		if (mappings_next > mappings_count) {
2272 			error = EINVAL;
2273 			goto done;
2274 		}
2275 		srfmp->slide = files[i].sf_slide;
2276 	}
2277 
2278 	/* get the process's shared region (setup in vm_map_exec()) */
2279 	shared_region = vm_shared_region_trim_and_get(current_task());
2280 	*shared_region_ptr = shared_region;
2281 	if (shared_region == NULL) {
2282 		SHARED_REGION_TRACE_ERROR(
2283 			("shared_region: %p [%d(%s)] map(): "
2284 			"no shared region\n",
2285 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2286 			proc_getpid(p), p->p_comm));
2287 		error = EINVAL;
2288 		goto done;
2289 	}
2290 
2291 	/*
2292 	 * Check the shared region matches the current root
2293 	 * directory of this process.  Deny the mapping to
2294 	 * avoid tainting the shared region with something that
2295 	 * doesn't quite belong into it.
2296 	 */
2297 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2298 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2299 		SHARED_REGION_TRACE_ERROR(
2300 			("shared_region: map(%p) root_dir mismatch\n",
2301 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2302 		error = EPERM;
2303 		goto done;
2304 	}
2305 
2306 
2307 	for (srfmp = &(*sr_file_mappings)[0];
2308 	    srfmp < &(*sr_file_mappings)[files_count];
2309 	    srfmp++) {
2310 		if (srfmp->mappings_count == 0) {
2311 			/* no mappings here... */
2312 			continue;
2313 		}
2314 
2315 		/*
2316 		 * A file descriptor of -1 is used to indicate that the data
2317 		 * to be put in the shared region for this mapping comes directly
2318 		 * from the processes address space. Ensure we have proper alignments.
2319 		 */
2320 		if (srfmp->fd == -1) {
2321 			/* only allow one mapping per fd */
2322 			if (srfmp->mappings_count > 1) {
2323 				SHARED_REGION_TRACE_ERROR(
2324 					("shared_region: %p [%d(%s)] map data >1 mapping\n",
2325 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2326 					proc_getpid(p), p->p_comm));
2327 				error = EINVAL;
2328 				goto done;
2329 			}
2330 
2331 			/*
2332 			 * The destination address and size must be page aligned.
2333 			 */
2334 			struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2335 			mach_vm_address_t dest_addr = mapping->sms_address;
2336 			mach_vm_size_t    map_size = mapping->sms_size;
2337 			if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2338 				SHARED_REGION_TRACE_ERROR(
2339 					("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2340 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2341 					proc_getpid(p), p->p_comm, dest_addr));
2342 				error = EINVAL;
2343 				goto done;
2344 			}
2345 			if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2346 				SHARED_REGION_TRACE_ERROR(
2347 					("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2348 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2349 					proc_getpid(p), p->p_comm, map_size));
2350 				error = EINVAL;
2351 				goto done;
2352 			}
2353 			continue;
2354 		}
2355 
2356 		/* get file structure from file descriptor */
2357 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2358 		if (error) {
2359 			SHARED_REGION_TRACE_ERROR(
2360 				("shared_region: %p [%d(%s)] map: "
2361 				"fd=%d lookup failed (error=%d)\n",
2362 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2363 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2364 			goto done;
2365 		}
2366 
2367 		/* we need at least read permission on the file */
2368 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2369 			SHARED_REGION_TRACE_ERROR(
2370 				("shared_region: %p [%d(%s)] map: "
2371 				"fd=%d not readable\n",
2372 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2373 				proc_getpid(p), p->p_comm, srfmp->fd));
2374 			error = EPERM;
2375 			goto done;
2376 		}
2377 
2378 		/* get vnode from file structure */
2379 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2380 		if (error) {
2381 			SHARED_REGION_TRACE_ERROR(
2382 				("shared_region: %p [%d(%s)] map: "
2383 				"fd=%d getwithref failed (error=%d)\n",
2384 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2385 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2386 			goto done;
2387 		}
2388 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2389 
2390 		/* make sure the vnode is a regular file */
2391 		if (srfmp->vp->v_type != VREG) {
2392 			SHARED_REGION_TRACE_ERROR(
2393 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2394 				"not a file (type=%d)\n",
2395 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2396 				proc_getpid(p), p->p_comm,
2397 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2398 				srfmp->vp->v_name, srfmp->vp->v_type));
2399 			error = EINVAL;
2400 			goto done;
2401 		}
2402 
2403 #if CONFIG_MACF
2404 		/* pass in 0 for the offset argument because AMFI does not need the offset
2405 		 *       of the shared cache */
2406 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2407 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2408 		if (error) {
2409 			goto done;
2410 		}
2411 #endif /* MAC */
2412 
2413 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2414 		/*
2415 		 * Check if the shared cache is in the trust cache;
2416 		 * if so, we can skip the root ownership check.
2417 		 */
2418 #if DEVELOPMENT || DEBUG
2419 		/*
2420 		 * Skip both root ownership and trust cache check if
2421 		 * enforcement is disabled.
2422 		 */
2423 		if (!cs_system_enforcement()) {
2424 			goto after_root_check;
2425 		}
2426 #endif /* DEVELOPMENT || DEBUG */
2427 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2428 		if (blob == NULL) {
2429 			SHARED_REGION_TRACE_ERROR(
2430 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2431 				"missing CS blob\n",
2432 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2433 				proc_getpid(p), p->p_comm,
2434 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2435 				srfmp->vp->v_name));
2436 			goto root_check;
2437 		}
2438 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2439 		if (cdhash == NULL) {
2440 			SHARED_REGION_TRACE_ERROR(
2441 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2442 				"missing cdhash\n",
2443 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2444 				proc_getpid(p), p->p_comm,
2445 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2446 				srfmp->vp->v_name));
2447 			goto root_check;
2448 		}
2449 
2450 		bool in_trust_cache = false;
2451 		TrustCacheQueryToken_t qt;
2452 		if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2453 			TCType_t tc_type = kTCTypeInvalid;
2454 			TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2455 			in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2456 			    (tc_type == kTCTypeCryptex1BootOS ||
2457 			    tc_type == kTCTypeStatic ||
2458 			    tc_type == kTCTypeEngineering));
2459 		}
2460 		if (!in_trust_cache) {
2461 			SHARED_REGION_TRACE_ERROR(
2462 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2463 				"not in trust cache\n",
2464 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2465 				proc_getpid(p), p->p_comm,
2466 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2467 				srfmp->vp->v_name));
2468 			goto root_check;
2469 		}
2470 		goto after_root_check;
2471 root_check:
2472 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2473 
2474 		/* The shared cache file must be owned by root */
2475 		VATTR_INIT(&va);
2476 		VATTR_WANTED(&va, va_uid);
2477 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2478 		if (error) {
2479 			SHARED_REGION_TRACE_ERROR(
2480 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2481 				"vnode_getattr(%p) failed (error=%d)\n",
2482 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2483 				proc_getpid(p), p->p_comm,
2484 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2485 				srfmp->vp->v_name,
2486 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2487 				error));
2488 			goto done;
2489 		}
2490 		if (va.va_uid != 0) {
2491 			SHARED_REGION_TRACE_ERROR(
2492 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2493 				"owned by uid=%d instead of 0\n",
2494 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2495 				proc_getpid(p), p->p_comm,
2496 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2497 				srfmp->vp->v_name, va.va_uid));
2498 			error = EPERM;
2499 			goto done;
2500 		}
2501 
2502 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2503 after_root_check:
2504 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2505 
2506 #if CONFIG_CSR
2507 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2508 			VATTR_INIT(&va);
2509 			VATTR_WANTED(&va, va_flags);
2510 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2511 			if (error) {
2512 				SHARED_REGION_TRACE_ERROR(
2513 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2514 					"vnode_getattr(%p) failed (error=%d)\n",
2515 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2516 					proc_getpid(p), p->p_comm,
2517 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2518 					srfmp->vp->v_name,
2519 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2520 					error));
2521 				goto done;
2522 			}
2523 
2524 			if (!(va.va_flags & SF_RESTRICTED)) {
2525 				/*
2526 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2527 				 * the shared cache file is NOT SIP-protected, so reject the
2528 				 * mapping request
2529 				 */
2530 				SHARED_REGION_TRACE_ERROR(
2531 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2532 					"vnode is not SIP-protected. \n",
2533 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2534 					proc_getpid(p), p->p_comm,
2535 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2536 					srfmp->vp->v_name));
2537 				error = EPERM;
2538 				goto done;
2539 			}
2540 		}
2541 #else /* CONFIG_CSR */
2542 
2543 		/*
2544 		 * Devices without SIP/ROSP need to make sure that the shared cache
2545 		 * is either on the root volume or in the preboot cryptex volume.
2546 		 */
2547 		assert(rdir_vp != NULL);
2548 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2549 			vnode_t preboot_vp = NULL;
2550 #if XNU_TARGET_OS_OSX
2551 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2552 #else
2553 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2554 #endif
2555 			error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2556 			if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2557 				SHARED_REGION_TRACE_ERROR(
2558 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2559 					"not on process' root volume nor preboot volume\n",
2560 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2561 					proc_getpid(p), p->p_comm,
2562 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2563 					srfmp->vp->v_name));
2564 				error = EPERM;
2565 				if (preboot_vp) {
2566 					(void)vnode_put(preboot_vp);
2567 				}
2568 				goto done;
2569 			} else if (preboot_vp) {
2570 				(void)vnode_put(preboot_vp);
2571 			}
2572 		}
2573 #endif /* CONFIG_CSR */
2574 
2575 		if (scdir_enforce) {
2576 			char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2577 			struct vnode *scdir_vp = NULL;
2578 			for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2579 			    *expected_scdir_path != NULL;
2580 			    expected_scdir_path++) {
2581 				/* get vnode for expected_scdir_path */
2582 				error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2583 				if (error) {
2584 					SHARED_REGION_TRACE_ERROR(
2585 						("shared_region: %p [%d(%s)]: "
2586 						"vnode_lookup(%s) failed (error=%d)\n",
2587 						(void *)VM_KERNEL_ADDRPERM(current_thread()),
2588 						proc_getpid(p), p->p_comm,
2589 						*expected_scdir_path, error));
2590 					continue;
2591 				}
2592 
2593 				/* check if parent is scdir_vp */
2594 				assert(scdir_vp != NULL);
2595 				if (vnode_parent(srfmp->vp) == scdir_vp) {
2596 					(void)vnode_put(scdir_vp);
2597 					scdir_vp = NULL;
2598 					goto scdir_ok;
2599 				}
2600 				(void)vnode_put(scdir_vp);
2601 				scdir_vp = NULL;
2602 			}
2603 			/* nothing matches */
2604 			SHARED_REGION_TRACE_ERROR(
2605 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2606 				"shared cache file not in expected directory\n",
2607 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2608 				proc_getpid(p), p->p_comm,
2609 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2610 				srfmp->vp->v_name));
2611 			error = EPERM;
2612 			goto done;
2613 		}
2614 scdir_ok:
2615 
2616 		/* get vnode size */
2617 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2618 		if (error) {
2619 			SHARED_REGION_TRACE_ERROR(
2620 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2621 				"vnode_size(%p) failed (error=%d)\n",
2622 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2623 				proc_getpid(p), p->p_comm,
2624 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2625 				srfmp->vp->v_name,
2626 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2627 			goto done;
2628 		}
2629 		srfmp->file_size = fs;
2630 
2631 		/* get the file's memory object handle */
2632 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2633 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2634 			SHARED_REGION_TRACE_ERROR(
2635 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2636 				"no memory object\n",
2637 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2638 				proc_getpid(p), p->p_comm,
2639 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2640 				srfmp->vp->v_name));
2641 			error = EINVAL;
2642 			goto done;
2643 		}
2644 
2645 		/* check that the mappings are properly covered by code signatures */
2646 		if (!cs_system_enforcement()) {
2647 			/* code signing is not enforced: no need to check */
2648 		} else {
2649 			for (i = 0; i < srfmp->mappings_count; i++) {
2650 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2651 					/* zero-filled mapping: not backed by the file */
2652 					continue;
2653 				}
2654 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2655 				    srfmp->mappings[i].sms_file_offset,
2656 				    srfmp->mappings[i].sms_size)) {
2657 					/* this mapping is fully covered by code signatures */
2658 					continue;
2659 				}
2660 				SHARED_REGION_TRACE_ERROR(
2661 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2662 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2663 					"is not code-signed\n",
2664 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2665 					proc_getpid(p), p->p_comm,
2666 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2667 					srfmp->vp->v_name,
2668 					i, srfmp->mappings_count,
2669 					srfmp->mappings[i].sms_address,
2670 					srfmp->mappings[i].sms_size,
2671 					srfmp->mappings[i].sms_file_offset,
2672 					srfmp->mappings[i].sms_max_prot,
2673 					srfmp->mappings[i].sms_init_prot));
2674 				error = EINVAL;
2675 				goto done;
2676 			}
2677 		}
2678 	}
2679 done:
2680 	if (error != 0) {
2681 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2682 		*sr_file_mappings = NULL;
2683 		*shared_region_ptr = NULL;
2684 	}
2685 	return error;
2686 }
2687 
2688 /*
2689  * shared_region_map_np()
2690  *
2691  * This system call is intended for dyld.
2692  *
2693  * dyld uses this to map a shared cache file into a shared region.
2694  * This is usually done only the first time a shared cache is needed.
2695  * Subsequent processes will just use the populated shared region without
2696  * requiring any further setup.
2697  */
2698 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2699 _shared_region_map_and_slide(
2700 	struct proc                         *p,
2701 	uint32_t                            files_count,
2702 	struct shared_file_np               *files,
2703 	uint32_t                            mappings_count,
2704 	struct shared_file_mapping_slide_np *mappings)
2705 {
2706 	int                             error = 0;
2707 	kern_return_t                   kr = KERN_SUCCESS;
2708 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2709 	struct vnode                    *rdir_vp = NULL;
2710 	struct vm_shared_region         *shared_region = NULL;
2711 
2712 	/*
2713 	 * Get a reference to the current proc's root dir.
2714 	 * Need this to prevent racing with chroot.
2715 	 */
2716 	proc_fdlock(p);
2717 	rdir_vp = p->p_fd.fd_rdir;
2718 	if (rdir_vp == NULL) {
2719 		rdir_vp = rootvnode;
2720 	}
2721 	assert(rdir_vp != NULL);
2722 	vnode_get(rdir_vp);
2723 	proc_fdunlock(p);
2724 
2725 	/*
2726 	 * Turn files, mappings into sr_file_mappings and other setup.
2727 	 */
2728 	error = shared_region_map_and_slide_setup(p, files_count,
2729 	    files, mappings_count, mappings,
2730 	    &sr_file_mappings, &shared_region, rdir_vp);
2731 	if (error != 0) {
2732 		vnode_put(rdir_vp);
2733 		return error;
2734 	}
2735 
2736 	/* map the file(s) into that shared region's submap */
2737 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2738 	if (kr != KERN_SUCCESS) {
2739 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2740 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2741 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2742 		    proc_getpid(p), p->p_comm, kr));
2743 	}
2744 
2745 	/* convert kern_return_t to errno */
2746 	switch (kr) {
2747 	case KERN_SUCCESS:
2748 		error = 0;
2749 		break;
2750 	case KERN_INVALID_ADDRESS:
2751 		error = EFAULT;
2752 		break;
2753 	case KERN_PROTECTION_FAILURE:
2754 		error = EPERM;
2755 		break;
2756 	case KERN_NO_SPACE:
2757 		error = ENOMEM;
2758 		break;
2759 	case KERN_FAILURE:
2760 	case KERN_INVALID_ARGUMENT:
2761 	default:
2762 		error = EINVAL;
2763 		break;
2764 	}
2765 
2766 	/*
2767 	 * Mark that this process is now using split libraries.
2768 	 */
2769 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2770 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2771 	}
2772 
2773 	vnode_put(rdir_vp);
2774 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2775 
2776 	SHARED_REGION_TRACE_DEBUG(
2777 		("shared_region: %p [%d(%s)] <- map\n",
2778 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2779 		proc_getpid(p), p->p_comm));
2780 
2781 	return error;
2782 }
2783 
2784 /*
2785  * Clean up part of _shared_region_map_and_slide()
2786  * It had to be broken out of _shared_region_map_and_slide() to
2787  * prevent compiler inlining from blowing out the stack.
2788  */
2789 __attribute__((noinline))
2790 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2791 shared_region_map_and_slide_cleanup(
2792 	struct proc              *p,
2793 	uint32_t                 files_count,
2794 	struct _sr_file_mappings *sr_file_mappings,
2795 	struct vm_shared_region  *shared_region)
2796 {
2797 	struct _sr_file_mappings *srfmp;
2798 	struct vnode_attr        va;
2799 
2800 	if (sr_file_mappings != NULL) {
2801 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2802 			if (srfmp->vp != NULL) {
2803 				vnode_lock_spin(srfmp->vp);
2804 				srfmp->vp->v_flag |= VSHARED_DYLD;
2805 				vnode_unlock(srfmp->vp);
2806 
2807 				/* update the vnode's access time */
2808 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2809 					VATTR_INIT(&va);
2810 					nanotime(&va.va_access_time);
2811 					VATTR_SET_ACTIVE(&va, va_access_time);
2812 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2813 				}
2814 
2815 #if NAMEDSTREAMS
2816 				/*
2817 				 * If the shared cache is compressed, it may
2818 				 * have a namedstream vnode instantiated for
2819 				 * for it. That namedstream vnode will also
2820 				 * have to be marked with VSHARED_DYLD.
2821 				 */
2822 				if (vnode_hasnamedstreams(srfmp->vp)) {
2823 					vnode_t svp;
2824 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2825 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2826 						vnode_lock_spin(svp);
2827 						svp->v_flag |= VSHARED_DYLD;
2828 						vnode_unlock(svp);
2829 						vnode_put(svp);
2830 					}
2831 				}
2832 #endif /* NAMEDSTREAMS */
2833 				/*
2834 				 * release the vnode...
2835 				 * ubc_map() still holds it for us in the non-error case
2836 				 */
2837 				(void) vnode_put(srfmp->vp);
2838 				srfmp->vp = NULL;
2839 			}
2840 			if (srfmp->fp != NULL) {
2841 				/* release the file descriptor */
2842 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2843 				srfmp->fp = NULL;
2844 			}
2845 		}
2846 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2847 	}
2848 
2849 	if (shared_region != NULL) {
2850 		vm_shared_region_deallocate(shared_region);
2851 	}
2852 }
2853 
2854 
2855 /*
2856  * For each file mapped, we may have mappings for:
2857  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2858  * so let's round up to 8 mappings per file.
2859  */
2860 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2861 
2862 /*
2863  * This is the new interface for setting up shared region mappings.
2864  *
2865  * The slide used for shared regions setup using this interface is done differently
2866  * from the old interface. The slide value passed in the shared_files_np represents
2867  * a max value. The kernel will choose a random value based on that, then use it
2868  * for all shared regions.
2869  */
2870 #if defined (__x86_64__)
2871 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2872 #else
2873 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2874 #endif
2875 
2876 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2877 shared_region_map_and_slide_2_np(
2878 	struct proc                                  *p,
2879 	struct shared_region_map_and_slide_2_np_args *uap,
2880 	__unused int                                 *retvalp)
2881 {
2882 	unsigned int                  files_count;
2883 	struct shared_file_np         *shared_files = NULL;
2884 	unsigned int                  mappings_count;
2885 	struct shared_file_mapping_slide_np *mappings = NULL;
2886 	kern_return_t                 kr = KERN_SUCCESS;
2887 
2888 	files_count = uap->files_count;
2889 	mappings_count = uap->mappings_count;
2890 
2891 	if (files_count == 0) {
2892 		SHARED_REGION_TRACE_INFO(
2893 			("shared_region: %p [%d(%s)] map(): "
2894 			"no files\n",
2895 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2896 			proc_getpid(p), p->p_comm));
2897 		kr = 0; /* no files to map: we're done ! */
2898 		goto done;
2899 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2900 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2901 		if (shared_files == NULL) {
2902 			kr = KERN_RESOURCE_SHORTAGE;
2903 			goto done;
2904 		}
2905 	} else {
2906 		SHARED_REGION_TRACE_ERROR(
2907 			("shared_region: %p [%d(%s)] map(): "
2908 			"too many files (%d) max %d\n",
2909 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2910 			proc_getpid(p), p->p_comm,
2911 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2912 		kr = KERN_FAILURE;
2913 		goto done;
2914 	}
2915 
2916 	if (mappings_count == 0) {
2917 		SHARED_REGION_TRACE_INFO(
2918 			("shared_region: %p [%d(%s)] map(): "
2919 			"no mappings\n",
2920 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2921 			proc_getpid(p), p->p_comm));
2922 		kr = 0; /* no mappings: we're done ! */
2923 		goto done;
2924 	} else if (mappings_count <= SFM_MAX) {
2925 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2926 		if (mappings == NULL) {
2927 			kr = KERN_RESOURCE_SHORTAGE;
2928 			goto done;
2929 		}
2930 	} else {
2931 		SHARED_REGION_TRACE_ERROR(
2932 			("shared_region: %p [%d(%s)] map(): "
2933 			"too many mappings (%d) max %d\n",
2934 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2935 			proc_getpid(p), p->p_comm,
2936 			mappings_count, SFM_MAX));
2937 		kr = KERN_FAILURE;
2938 		goto done;
2939 	}
2940 
2941 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2942 	if (kr != KERN_SUCCESS) {
2943 		goto done;
2944 	}
2945 
2946 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2947 	if (kr != KERN_SUCCESS) {
2948 		goto done;
2949 	}
2950 
2951 	uint32_t max_slide = shared_files[0].sf_slide;
2952 	uint32_t random_val;
2953 	uint32_t slide_amount;
2954 
2955 	if (max_slide != 0) {
2956 		read_random(&random_val, sizeof random_val);
2957 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2958 	} else {
2959 		slide_amount = 0;
2960 	}
2961 #if DEVELOPMENT || DEBUG
2962 	extern bool bootarg_disable_aslr;
2963 	if (bootarg_disable_aslr) {
2964 		slide_amount = 0;
2965 	}
2966 #endif /* DEVELOPMENT || DEBUG */
2967 
2968 	/*
2969 	 * Fix up the mappings to reflect the desired slide.
2970 	 */
2971 	unsigned int f;
2972 	unsigned int m = 0;
2973 	unsigned int i;
2974 	for (f = 0; f < files_count; ++f) {
2975 		shared_files[f].sf_slide = slide_amount;
2976 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2977 			if (m >= mappings_count) {
2978 				SHARED_REGION_TRACE_ERROR(
2979 					("shared_region: %p [%d(%s)] map(): "
2980 					"mapping count argument was too small\n",
2981 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2982 					proc_getpid(p), p->p_comm));
2983 				kr = KERN_FAILURE;
2984 				goto done;
2985 			}
2986 			mappings[m].sms_address += slide_amount;
2987 			if (mappings[m].sms_slide_size != 0) {
2988 				mappings[m].sms_slide_start += slide_amount;
2989 			}
2990 		}
2991 	}
2992 
2993 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2994 done:
2995 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2996 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2997 	return kr;
2998 }
2999 
3000 /*
3001  * A syscall for dyld to use to map data pages that need load time relocation fixups.
3002  * The fixups are performed by a custom pager during page-in, so the pages still appear
3003  * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3004  * on demand later, all w/o using the compressor.
3005  *
3006  * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3007  * running, they are COW'd as normal.
3008  */
3009 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)3010 map_with_linking_np(
3011 	struct proc                     *p,
3012 	struct map_with_linking_np_args *uap,
3013 	__unused int                    *retvalp)
3014 {
3015 	uint32_t                        region_count;
3016 	uint32_t                        r;
3017 	struct mwl_region               *regions = NULL;
3018 	struct mwl_region               *rp;
3019 	uint32_t                        link_info_size;
3020 	void                            *link_info = NULL;      /* starts with a struct mwl_info_hdr */
3021 	struct mwl_info_hdr             *info_hdr = NULL;
3022 	uint64_t                        binds_size;
3023 	int                             fd;
3024 	struct fileproc                 *fp = NULL;
3025 	struct vnode                    *vp = NULL;
3026 	size_t                          file_size;
3027 	off_t                           fs;
3028 	struct vnode_attr               va;
3029 	memory_object_control_t         file_control = NULL;
3030 	int                             error;
3031 	kern_return_t                   kr = KERN_SUCCESS;
3032 
3033 	/*
3034 	 * Check if dyld has told us it finished with this call.
3035 	 */
3036 	if (p->p_disallow_map_with_linking) {
3037 		printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3038 		    __func__, proc_getpid(p), p->p_comm);
3039 		kr = KERN_FAILURE;
3040 		goto done;
3041 	}
3042 
3043 	/*
3044 	 * First we do some sanity checking on what dyld has passed us.
3045 	 */
3046 	region_count = uap->region_count;
3047 	link_info_size = uap->link_info_size;
3048 	if (region_count == 0) {
3049 		printf("%s: [%d(%s)]: region_count == 0\n",
3050 		    __func__, proc_getpid(p), p->p_comm);
3051 		kr = KERN_FAILURE;
3052 		goto done;
3053 	}
3054 	if (region_count > MWL_MAX_REGION_COUNT) {
3055 		printf("%s: [%d(%s)]: region_count too big %d\n",
3056 		    __func__, proc_getpid(p), p->p_comm, region_count);
3057 		kr = KERN_FAILURE;
3058 		goto done;
3059 	}
3060 
3061 	if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3062 		printf("%s: [%d(%s)]: link_info_size too small\n",
3063 		    __func__, proc_getpid(p), p->p_comm);
3064 		kr = KERN_FAILURE;
3065 		goto done;
3066 	}
3067 	if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3068 		printf("%s: [%d(%s)]: link_info_size too big %d\n",
3069 		    __func__, proc_getpid(p), p->p_comm, link_info_size);
3070 		kr = KERN_FAILURE;
3071 		goto done;
3072 	}
3073 
3074 	/*
3075 	 * Allocate and copyin the regions and link info
3076 	 */
3077 	regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3078 	if (regions == NULL) {
3079 		printf("%s: [%d(%s)]: failed to allocate regions\n",
3080 		    __func__, proc_getpid(p), p->p_comm);
3081 		kr = KERN_RESOURCE_SHORTAGE;
3082 		goto done;
3083 	}
3084 	kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3085 	if (kr != KERN_SUCCESS) {
3086 		printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3087 		    __func__, proc_getpid(p), p->p_comm, kr);
3088 		goto done;
3089 	}
3090 
3091 	link_info = kalloc_data(link_info_size, Z_WAITOK);
3092 	if (link_info == NULL) {
3093 		printf("%s: [%d(%s)]: failed to allocate link_info\n",
3094 		    __func__, proc_getpid(p), p->p_comm);
3095 		kr = KERN_RESOURCE_SHORTAGE;
3096 		goto done;
3097 	}
3098 	kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3099 	if (kr != KERN_SUCCESS) {
3100 		printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3101 		    __func__, proc_getpid(p), p->p_comm, kr);
3102 		goto done;
3103 	}
3104 
3105 	/*
3106 	 * Do some verification the data structures.
3107 	 */
3108 	info_hdr = (struct mwl_info_hdr *)link_info;
3109 	if (info_hdr->mwli_version != MWL_INFO_VERS) {
3110 		printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3111 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3112 		kr = KERN_FAILURE;
3113 		goto done;
3114 	}
3115 
3116 	if (info_hdr->mwli_binds_offset > link_info_size) {
3117 		printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3118 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3119 		kr = KERN_FAILURE;
3120 		goto done;
3121 	}
3122 
3123 	/* some older devs have s/w page size > h/w page size, no need to support them */
3124 	if (info_hdr->mwli_page_size != PAGE_SIZE) {
3125 		/* no printf, since this is expected on some devices */
3126 		kr = KERN_INVALID_ARGUMENT;
3127 		goto done;
3128 	}
3129 
3130 	binds_size = (uint64_t)info_hdr->mwli_binds_count *
3131 	    ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3132 	if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3133 		printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3134 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3135 		kr = KERN_FAILURE;
3136 		goto done;
3137 	}
3138 
3139 	if (info_hdr->mwli_chains_offset > link_info_size) {
3140 		printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3141 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3142 		kr = KERN_FAILURE;
3143 		goto done;
3144 	}
3145 
3146 
3147 	/*
3148 	 * Ensure the chained starts in the link info and make sure the
3149 	 * segment info offsets are within bounds.
3150 	 */
3151 	if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3152 		printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3153 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3154 		kr = KERN_FAILURE;
3155 		goto done;
3156 	}
3157 	if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3158 		printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3159 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3160 		kr = KERN_FAILURE;
3161 		goto done;
3162 	}
3163 
3164 	/* Note that more verification of offsets is done in the pager itself */
3165 
3166 	/*
3167 	 * Ensure we've only been given one FD and verify valid protections.
3168 	 */
3169 	fd = regions[0].mwlr_fd;
3170 	for (r = 0; r < region_count; ++r) {
3171 		if (regions[r].mwlr_fd != fd) {
3172 			printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3173 			    __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3174 			kr = KERN_FAILURE;
3175 			goto done;
3176 		}
3177 
3178 		/*
3179 		 * Only allow data mappings and not zero fill. Permit TPRO
3180 		 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
3181 		 */
3182 		if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3183 			printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3184 			    __func__, proc_getpid(p), p->p_comm);
3185 			kr = KERN_FAILURE;
3186 			goto done;
3187 		}
3188 		if (regions[r].mwlr_protections & VM_PROT_ZF) {
3189 			printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
3190 			    __func__, proc_getpid(p), p->p_comm, r);
3191 			kr = KERN_FAILURE;
3192 			goto done;
3193 		}
3194 		if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
3195 		    !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
3196 			printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
3197 			    __func__, proc_getpid(p), p->p_comm, r);
3198 			kr = KERN_FAILURE;
3199 			goto done;
3200 		}
3201 	}
3202 
3203 
3204 	/* get file structure from file descriptor */
3205 	error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3206 	if (error) {
3207 		printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3208 		    __func__, proc_getpid(p), p->p_comm, error);
3209 		kr = KERN_FAILURE;
3210 		goto done;
3211 	}
3212 
3213 	/* We need at least read permission on the file */
3214 	if (!(fp->fp_glob->fg_flag & FREAD)) {
3215 		printf("%s: [%d(%s)]: not readable\n",
3216 		    __func__, proc_getpid(p), p->p_comm);
3217 		kr = KERN_FAILURE;
3218 		goto done;
3219 	}
3220 
3221 	/* Get the vnode from file structure */
3222 	vp = (struct vnode *)fp_get_data(fp);
3223 	error = vnode_getwithref(vp);
3224 	if (error) {
3225 		printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3226 		    __func__, proc_getpid(p), p->p_comm, error);
3227 		kr = KERN_FAILURE;
3228 		vp = NULL; /* just to be sure */
3229 		goto done;
3230 	}
3231 
3232 	/* Make sure the vnode is a regular file */
3233 	if (vp->v_type != VREG) {
3234 		printf("%s: [%d(%s)]: vnode not VREG\n",
3235 		    __func__, proc_getpid(p), p->p_comm);
3236 		kr = KERN_FAILURE;
3237 		goto done;
3238 	}
3239 
3240 	/* get vnode size */
3241 	error = vnode_size(vp, &fs, vfs_context_current());
3242 	if (error) {
3243 		goto done;
3244 	}
3245 	file_size = fs;
3246 
3247 	/* get the file's memory object handle */
3248 	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3249 	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3250 		printf("%s: [%d(%s)]: no memory object\n",
3251 		    __func__, proc_getpid(p), p->p_comm);
3252 		kr = KERN_FAILURE;
3253 		goto done;
3254 	}
3255 
3256 	for (r = 0; r < region_count; ++r) {
3257 		rp = &regions[r];
3258 
3259 #if CONFIG_MACF
3260 		vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3261 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3262 		    fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3263 		if (error) {
3264 			printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3265 			    __func__, proc_getpid(p), p->p_comm, r, error);
3266 			kr = KERN_FAILURE;
3267 			goto done;
3268 		}
3269 #endif /* MAC */
3270 
3271 		/* check that the mappings are properly covered by code signatures */
3272 		if (cs_system_enforcement()) {
3273 			if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3274 				printf("%s: [%d(%s)]: region %d, not code signed\n",
3275 				    __func__, proc_getpid(p), p->p_comm, r);
3276 				kr = KERN_FAILURE;
3277 				goto done;
3278 			}
3279 		}
3280 	}
3281 
3282 	/* update the vnode's access time */
3283 	if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3284 		VATTR_INIT(&va);
3285 		nanotime(&va.va_access_time);
3286 		VATTR_SET_ACTIVE(&va, va_access_time);
3287 		vnode_setattr(vp, &va, vfs_context_current());
3288 	}
3289 
3290 	/* get the VM to do the work */
3291 	kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3292 
3293 done:
3294 	if (fp != NULL) {
3295 		/* release the file descriptor */
3296 		fp_drop(p, fd, fp, 0);
3297 	}
3298 	if (vp != NULL) {
3299 		(void)vnode_put(vp);
3300 	}
3301 	if (regions != NULL) {
3302 		kfree_data(regions, region_count * sizeof(regions[0]));
3303 	}
3304 	/* link info is used in the pager if things worked */
3305 	if (link_info != NULL && kr != KERN_SUCCESS) {
3306 		kfree_data(link_info, link_info_size);
3307 	}
3308 
3309 	switch (kr) {
3310 	case KERN_SUCCESS:
3311 		return 0;
3312 	case KERN_RESOURCE_SHORTAGE:
3313 		return ENOMEM;
3314 	default:
3315 		return EINVAL;
3316 	}
3317 }
3318 
3319 #if DEBUG || DEVELOPMENT
3320 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3321     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3322 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3323     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3324 #endif /* DEBUG || DEVELOPMENT */
3325 
3326 /* sysctl overflow room */
3327 
3328 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3329     (int *) &page_size, 0, "vm page size");
3330 
3331 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3332  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3333  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3334 extern unsigned int     vm_page_free_target;
3335 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3336     &vm_page_free_target, 0, "Pageout daemon free target");
3337 
3338 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3339     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3340 
3341 static int
3342 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3343 {
3344 #pragma unused(oidp, arg1, arg2)
3345 	unsigned int page_free_wanted;
3346 
3347 	page_free_wanted = mach_vm_ctl_page_free_wanted();
3348 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3349 }
3350 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3351     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3352     0, 0, vm_ctl_page_free_wanted, "I", "");
3353 
3354 extern unsigned int     vm_page_purgeable_count;
3355 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3356     &vm_page_purgeable_count, 0, "Purgeable page count");
3357 
3358 extern unsigned int     vm_page_purgeable_wired_count;
3359 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3360     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3361 
3362 extern unsigned int vm_page_kern_lpage_count;
3363 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3364     &vm_page_kern_lpage_count, 0, "kernel used large pages");
3365 
3366 #if DEVELOPMENT || DEBUG
3367 #if __ARM_MIXED_PAGE_SIZE__
3368 static int vm_mixed_pagesize_supported = 1;
3369 #else
3370 static int vm_mixed_pagesize_supported = 0;
3371 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3372 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3373     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3374 
3375 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3376 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3377 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3378     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3379 
3380 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3381     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3382 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3383     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3384 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3385     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3386 
3387 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3388     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3389 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3390     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3391 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3392     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3393 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3394     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3395 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3396     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3397 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3398     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3399 #endif /* DEVELOPMENT || DEBUG */
3400 
3401 extern int madvise_free_debug;
3402 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3403     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3404 extern int madvise_free_debug_sometimes;
3405 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3406     &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3407 
3408 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3409     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3410 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3411     &vm_page_stats_reusable.reusable_pages_success, "");
3412 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3413     &vm_page_stats_reusable.reusable_pages_failure, "");
3414 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3415     &vm_page_stats_reusable.reusable_pages_shared, "");
3416 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3417     &vm_page_stats_reusable.all_reusable_calls, "");
3418 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3419     &vm_page_stats_reusable.partial_reusable_calls, "");
3420 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3421     &vm_page_stats_reusable.reuse_pages_success, "");
3422 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3423     &vm_page_stats_reusable.reuse_pages_failure, "");
3424 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3425     &vm_page_stats_reusable.all_reuse_calls, "");
3426 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3427     &vm_page_stats_reusable.partial_reuse_calls, "");
3428 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3429     &vm_page_stats_reusable.can_reuse_success, "");
3430 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3431     &vm_page_stats_reusable.can_reuse_failure, "");
3432 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3433     &vm_page_stats_reusable.reusable_reclaimed, "");
3434 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3435     &vm_page_stats_reusable.reusable_nonwritable, "");
3436 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3437     &vm_page_stats_reusable.reusable_shared, "");
3438 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3439     &vm_page_stats_reusable.free_shared, "");
3440 
3441 
3442 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3443 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3444 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3445 
3446 extern unsigned int vm_page_cleaned_count;
3447 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3448 
3449 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3450 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3451 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3452 
3453 /* pageout counts */
3454 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3455 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3456 
3457 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3458 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3459 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3460 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3461 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3462 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3463 
3464 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3465 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3466 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3467 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3468 extern unsigned int vm_page_realtime_count;
3469 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3470 extern int vm_pageout_protect_realtime;
3471 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3472 
3473 /* counts of pages prefaulted when entering a memory object */
3474 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3475 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3476 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3477 
3478 #if defined (__x86_64__)
3479 extern unsigned int vm_clump_promote_threshold;
3480 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3481 #if DEVELOPMENT || DEBUG
3482 extern unsigned long vm_clump_stats[];
3483 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3488 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3489 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3490 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3491 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3492 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3493 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3494 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3499 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3500 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3503 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3504 #endif  /* if DEVELOPMENT || DEBUG */
3505 #endif  /* #if defined (__x86_64__) */
3506 
3507 #if CONFIG_SECLUDED_MEMORY
3508 
3509 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3510 extern unsigned int vm_page_secluded_target;
3511 extern unsigned int vm_page_secluded_count;
3512 extern unsigned int vm_page_secluded_count_free;
3513 extern unsigned int vm_page_secluded_count_inuse;
3514 extern unsigned int vm_page_secluded_count_over_target;
3515 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3516 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3517 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3518 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3519 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3520 
3521 extern struct vm_page_secluded_data vm_page_secluded;
3522 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3523 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3524 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3525 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3526 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3530 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3531 
3532 #endif /* CONFIG_SECLUDED_MEMORY */
3533 
3534 #pragma mark Deferred Reclaim
3535 
3536 #if CONFIG_DEFERRED_RECLAIM
3537 
3538 #if DEVELOPMENT || DEBUG
3539 /*
3540  * VM reclaim testing
3541  */
3542 extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
3543 
3544 static int
3545 sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
3546 {
3547 #pragma unused(arg1, arg2)
3548 	int error = EINVAL, pid = 0;
3549 	/*
3550 	 * Only send on write
3551 	 */
3552 	error = sysctl_handle_int(oidp, &pid, 0, req);
3553 	if (error || !req->newptr) {
3554 		return error;
3555 	}
3556 
3557 	bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
3558 	if (success) {
3559 		error = 0;
3560 	}
3561 
3562 	return error;
3563 }
3564 
3565 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
3566     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
3567     &sysctl_vm_reclaim_drain_async_queue, "I", "");
3568 
3569 
3570 extern uint64_t vm_reclaim_max_threshold;
3571 extern uint64_t vm_reclaim_trim_divisor;
3572 
3573 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
3574 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
3575 #endif /* DEVELOPMENT || DEBUG */
3576 
3577 #endif /* CONFIG_DEFERRED_RECLAIM */
3578 
3579 #include <kern/thread.h>
3580 #include <sys/user.h>
3581 
3582 void vm_pageout_io_throttle(void);
3583 
3584 void
vm_pageout_io_throttle(void)3585 vm_pageout_io_throttle(void)
3586 {
3587 	struct uthread *uthread = current_uthread();
3588 
3589 	/*
3590 	 * thread is marked as a low priority I/O type
3591 	 * and the I/O we issued while in this cleaning operation
3592 	 * collided with normal I/O operations... we'll
3593 	 * delay in order to mitigate the impact of this
3594 	 * task on the normal operation of the system
3595 	 */
3596 
3597 	if (uthread->uu_lowpri_window) {
3598 		throttle_lowpri_io(1);
3599 	}
3600 }
3601 
3602 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3603 vm_pressure_monitor(
3604 	__unused struct proc *p,
3605 	struct vm_pressure_monitor_args *uap,
3606 	int *retval)
3607 {
3608 	kern_return_t   kr;
3609 	uint32_t        pages_reclaimed;
3610 	uint32_t        pages_wanted;
3611 
3612 	kr = mach_vm_pressure_monitor(
3613 		(boolean_t) uap->wait_for_pressure,
3614 		uap->nsecs_monitored,
3615 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3616 		&pages_wanted);
3617 
3618 	switch (kr) {
3619 	case KERN_SUCCESS:
3620 		break;
3621 	case KERN_ABORTED:
3622 		return EINTR;
3623 	default:
3624 		return EINVAL;
3625 	}
3626 
3627 	if (uap->pages_reclaimed) {
3628 		if (copyout((void *)&pages_reclaimed,
3629 		    uap->pages_reclaimed,
3630 		    sizeof(pages_reclaimed)) != 0) {
3631 			return EFAULT;
3632 		}
3633 	}
3634 
3635 	*retval = (int) pages_wanted;
3636 	return 0;
3637 }
3638 
3639 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3640 kas_info(struct proc *p,
3641     struct kas_info_args *uap,
3642     int *retval __unused)
3643 {
3644 #ifndef CONFIG_KAS_INFO
3645 	(void)p;
3646 	(void)uap;
3647 	return ENOTSUP;
3648 #else /* CONFIG_KAS_INFO */
3649 	int                     selector = uap->selector;
3650 	user_addr_t     valuep = uap->value;
3651 	user_addr_t     sizep = uap->size;
3652 	user_size_t size, rsize;
3653 	int                     error;
3654 
3655 	if (!kauth_cred_issuser(kauth_cred_get())) {
3656 		return EPERM;
3657 	}
3658 
3659 #if CONFIG_MACF
3660 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3661 	if (error) {
3662 		return error;
3663 	}
3664 #endif
3665 
3666 	if (IS_64BIT_PROCESS(p)) {
3667 		user64_size_t size64;
3668 		error = copyin(sizep, &size64, sizeof(size64));
3669 		size = (user_size_t)size64;
3670 	} else {
3671 		user32_size_t size32;
3672 		error = copyin(sizep, &size32, sizeof(size32));
3673 		size = (user_size_t)size32;
3674 	}
3675 	if (error) {
3676 		return error;
3677 	}
3678 
3679 	switch (selector) {
3680 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3681 	{
3682 		uint64_t slide = vm_kernel_slide;
3683 
3684 		if (sizeof(slide) != size) {
3685 			return EINVAL;
3686 		}
3687 
3688 		error = copyout(&slide, valuep, sizeof(slide));
3689 		if (error) {
3690 			return error;
3691 		}
3692 		rsize = size;
3693 	}
3694 	break;
3695 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3696 	{
3697 		uint32_t i;
3698 		kernel_mach_header_t *mh = &_mh_execute_header;
3699 		struct load_command *cmd;
3700 		cmd = (struct load_command*) &mh[1];
3701 		uint64_t *bases;
3702 		rsize = mh->ncmds * sizeof(uint64_t);
3703 
3704 		/*
3705 		 * Return the size if no data was passed
3706 		 */
3707 		if (valuep == 0) {
3708 			break;
3709 		}
3710 
3711 		if (rsize > size) {
3712 			return EINVAL;
3713 		}
3714 
3715 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3716 
3717 		for (i = 0; i < mh->ncmds; i++) {
3718 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3719 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3720 				bases[i] = (uint64_t)sg->vmaddr;
3721 			}
3722 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3723 		}
3724 
3725 		error = copyout(bases, valuep, rsize);
3726 
3727 		kfree_data(bases, rsize);
3728 
3729 		if (error) {
3730 			return error;
3731 		}
3732 	}
3733 	break;
3734 	default:
3735 		return EINVAL;
3736 	}
3737 
3738 	if (IS_64BIT_PROCESS(p)) {
3739 		user64_size_t size64 = (user64_size_t)rsize;
3740 		error = copyout(&size64, sizep, sizeof(size64));
3741 	} else {
3742 		user32_size_t size32 = (user32_size_t)rsize;
3743 		error = copyout(&size32, sizep, sizeof(size32));
3744 	}
3745 
3746 	return error;
3747 #endif /* CONFIG_KAS_INFO */
3748 }
3749 
3750 #if __has_feature(ptrauth_calls)
3751 /*
3752  * Generate a random pointer signing key that isn't 0.
3753  */
3754 uint64_t
generate_jop_key(void)3755 generate_jop_key(void)
3756 {
3757 	uint64_t key;
3758 
3759 	do {
3760 		read_random(&key, sizeof key);
3761 	} while (key == 0);
3762 	return key;
3763 }
3764 #endif /* __has_feature(ptrauth_calls) */
3765 
3766 
3767 #pragma clang diagnostic push
3768 #pragma clang diagnostic ignored "-Wcast-qual"
3769 #pragma clang diagnostic ignored "-Wunused-function"
3770 
3771 static void
asserts()3772 asserts()
3773 {
3774 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3775 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3776 }
3777 
3778 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3779 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3780 #pragma clang diagnostic pop
3781 
3782 extern uint32_t vm_page_pages;
3783 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3784 
3785 extern uint32_t vm_page_busy_absent_skipped;
3786 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3787 
3788 extern uint32_t vm_page_upl_tainted;
3789 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3790 
3791 extern uint32_t vm_page_iopl_tainted;
3792 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3793 
3794 #if __arm64__ && (DEVELOPMENT || DEBUG)
3795 extern int vm_footprint_suspend_allowed;
3796 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3797 
3798 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3799 static int
3800 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3801 {
3802 #pragma unused(oidp, arg1, arg2)
3803 	int error = 0;
3804 	int new_value;
3805 
3806 	if (req->newptr == USER_ADDR_NULL) {
3807 		return 0;
3808 	}
3809 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3810 	if (error) {
3811 		return error;
3812 	}
3813 	if (!vm_footprint_suspend_allowed) {
3814 		if (new_value != 0) {
3815 			/* suspends are not allowed... */
3816 			return 0;
3817 		}
3818 		/* ... but let resumes proceed */
3819 	}
3820 	DTRACE_VM2(footprint_suspend,
3821 	    vm_map_t, current_map(),
3822 	    int, new_value);
3823 
3824 	pmap_footprint_suspend(current_map(), new_value);
3825 
3826 	return 0;
3827 }
3828 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3829     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3830     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3831 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3832 
3833 extern uint64_t vm_map_corpse_footprint_count;
3834 extern uint64_t vm_map_corpse_footprint_size_avg;
3835 extern uint64_t vm_map_corpse_footprint_size_max;
3836 extern uint64_t vm_map_corpse_footprint_full;
3837 extern uint64_t vm_map_corpse_footprint_no_buf;
3838 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3839     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3840 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3841     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3842 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3843     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3844 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3845     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3846 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3847     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3848 
3849 #if CODE_SIGNING_MONITOR
3850 extern uint64_t vm_cs_defer_to_csm;
3851 extern uint64_t vm_cs_defer_to_csm_not;
3852 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3853     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3854 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3855     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3856 #endif /* CODE_SIGNING_MONITOR */
3857 
3858 extern uint64_t shared_region_pager_copied;
3859 extern uint64_t shared_region_pager_slid;
3860 extern uint64_t shared_region_pager_slid_error;
3861 extern uint64_t shared_region_pager_reclaimed;
3862 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3863     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3864 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3865     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3866 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3867     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3868 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3869     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3870 extern int shared_region_destroy_delay;
3871 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3872     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3873 
3874 #if MACH_ASSERT
3875 extern int pmap_ledgers_panic_leeway;
3876 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3877 #endif /* MACH_ASSERT */
3878 
3879 
3880 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3881 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3882 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3883 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3884 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3885 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3886 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3887 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3888 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3889 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3890 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3891 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3892 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3893 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3894     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3895 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3896     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3897 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3898     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3899 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3900     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3901 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3902     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3903 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3904     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3905 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3906     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3907 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3908     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3909 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3910     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3911 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3912     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3913 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3914     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3915 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3916     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3917 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3918     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3919 
3920 extern int vm_protect_privileged_from_untrusted;
3921 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3922     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3923 extern uint64_t vm_copied_on_read;
3924 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3925     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3926 
3927 extern int vm_shared_region_count;
3928 extern int vm_shared_region_peak;
3929 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3930     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3931 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3932     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3933 #if DEVELOPMENT || DEBUG
3934 extern unsigned int shared_region_pagers_resident_count;
3935 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3936     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3937 extern unsigned int shared_region_pagers_resident_peak;
3938 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3939     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3940 extern int shared_region_pager_count;
3941 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3942     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3943 #if __has_feature(ptrauth_calls)
3944 extern int shared_region_key_count;
3945 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3946     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3947 extern int vm_shared_region_reslide_count;
3948 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3949     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3950 #endif /* __has_feature(ptrauth_calls) */
3951 #endif /* DEVELOPMENT || DEBUG */
3952 
3953 #if MACH_ASSERT
3954 extern int debug4k_filter;
3955 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3956 extern int debug4k_panic_on_terminate;
3957 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3958 extern int debug4k_panic_on_exception;
3959 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3960 extern int debug4k_panic_on_misaligned_sharing;
3961 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3962 #endif /* MACH_ASSERT */
3963 
3964 extern uint64_t vm_map_set_size_limit_count;
3965 extern uint64_t vm_map_set_data_limit_count;
3966 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3967 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3968 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3969 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3970 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3971 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3972 
3973 extern uint64_t vm_fault_resilient_media_initiate;
3974 extern uint64_t vm_fault_resilient_media_retry;
3975 extern uint64_t vm_fault_resilient_media_proceed;
3976 extern uint64_t vm_fault_resilient_media_release;
3977 extern uint64_t vm_fault_resilient_media_abort1;
3978 extern uint64_t vm_fault_resilient_media_abort2;
3979 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3980 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3981 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3982 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3983 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3984 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3985 #if MACH_ASSERT
3986 extern int vm_fault_resilient_media_inject_error1_rate;
3987 extern int vm_fault_resilient_media_inject_error1;
3988 extern int vm_fault_resilient_media_inject_error2_rate;
3989 extern int vm_fault_resilient_media_inject_error2;
3990 extern int vm_fault_resilient_media_inject_error3_rate;
3991 extern int vm_fault_resilient_media_inject_error3;
3992 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3993 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3994 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3995 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3996 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3997 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3998 #endif /* MACH_ASSERT */
3999 
4000 extern uint64_t pmap_query_page_info_retries;
4001 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
4002 
4003 /*
4004  * A sysctl which causes all existing shared regions to become stale. They
4005  * will no longer be used by anything new and will be torn down as soon as
4006  * the last existing user exits. A write of non-zero value causes that to happen.
4007  * This should only be used by launchd, so we check that this is initproc.
4008  */
4009 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4010 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4011 {
4012 	unsigned int value = 0;
4013 	int changed = 0;
4014 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
4015 	if (error || !changed) {
4016 		return error;
4017 	}
4018 	if (current_proc() != initproc) {
4019 		return EPERM;
4020 	}
4021 
4022 	vm_shared_region_pivot();
4023 
4024 	return 0;
4025 }
4026 
4027 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
4028     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
4029     0, 0, shared_region_pivot, "I", "");
4030 
4031 extern uint64_t vm_object_shadow_forced;
4032 extern uint64_t vm_object_shadow_skipped;
4033 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
4034     &vm_object_shadow_forced, "");
4035 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
4036     &vm_object_shadow_skipped, "");
4037 
4038 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
4039     &vmtc_total, 0, "total text page corruptions detected");
4040 
4041 
4042 #if DEBUG || DEVELOPMENT
4043 /*
4044  * A sysctl that can be used to corrupt a text page with an illegal instruction.
4045  * Used for testing text page self healing.
4046  */
4047 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4048 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4049 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4050 {
4051 	uint64_t value = 0;
4052 	int error = sysctl_handle_quad(oidp, &value, 0, req);
4053 	if (error || !req->newptr) {
4054 		return error;
4055 	}
4056 
4057 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4058 		return 0;
4059 	} else {
4060 		return EINVAL;
4061 	}
4062 }
4063 
4064 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4065     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4066     0, 0, corrupt_text_addr, "-", "");
4067 #endif /* DEBUG || DEVELOPMENT */
4068 
4069 #if CONFIG_MAP_RANGES
4070 /*
4071  * vm.malloc_ranges
4072  *
4073  * space-separated list of <left:right> hexadecimal addresses.
4074  */
4075 static int
4076 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
4077 {
4078 	vm_map_t map = current_map();
4079 	struct mach_vm_range r1, r2;
4080 	char str[20 * 4];
4081 	int len;
4082 
4083 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
4084 		return ENOENT;
4085 	}
4086 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
4087 		return ENOENT;
4088 	}
4089 
4090 	len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
4091 	    r1.max_address, r2.min_address,
4092 	    r2.max_address, get_map_max(map));
4093 
4094 	return SYSCTL_OUT(req, str, len);
4095 }
4096 
4097 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
4098     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4099     0, 0, &vm_map_malloc_ranges, "A", "");
4100 
4101 #if DEBUG || DEVELOPMENT
4102 static int
4103 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4104 {
4105 #pragma unused(arg1, arg2, oidp)
4106 	struct mach_vm_range range;
4107 
4108 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4109 	    != KERN_SUCCESS) {
4110 		return EINVAL;
4111 	}
4112 
4113 	return SYSCTL_OUT(req, &range, sizeof(range));
4114 }
4115 
4116 static int
4117 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4118 {
4119 #pragma unused(arg1, arg2, oidp)
4120 	struct mach_vm_range range;
4121 
4122 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4123 	    != KERN_SUCCESS) {
4124 		return EINVAL;
4125 	}
4126 
4127 	return SYSCTL_OUT(req, &range, sizeof(range));
4128 }
4129 
4130 /*
4131  * A sysctl that can be used to return ranges for the current VM map.
4132  * Used for testing VM ranges.
4133  */
4134 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4135     0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4136 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4137     0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4138 
4139 #endif /* DEBUG || DEVELOPMENT */
4140 #endif /* CONFIG_MAP_RANGES */
4141 
4142 #if DEBUG || DEVELOPMENT
4143 #endif /* DEBUG || DEVELOPMENT */
4144 
4145 extern uint64_t vm_map_range_overflows_count;
4146 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4147 extern boolean_t vm_map_range_overflows_log;
4148 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4149 
4150 extern uint64_t c_seg_filled_no_contention;
4151 extern uint64_t c_seg_filled_contention;
4152 extern clock_sec_t c_seg_filled_contention_sec_max;
4153 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4154 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4155 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4156 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4157 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4158 #if (XNU_TARGET_OS_OSX && __arm64__)
4159 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4160 extern int c_process_major_yield_after; /* yield after moving ? segments */
4161 extern uint64_t c_process_major_reports;
4162 extern clock_sec_t c_process_major_max_sec;
4163 extern clock_nsec_t c_process_major_max_nsec;
4164 extern uint32_t c_process_major_peak_segcount;
4165 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4166 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4167 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4168 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4169 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4170 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4171 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4172 
4173 #if DEVELOPMENT || DEBUG
4174 extern int panic_object_not_alive;
4175 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4176 #endif /* DEVELOPMENT || DEBUG */
4177 
4178 #if MACH_ASSERT
4179 extern int fbdp_no_panic;
4180 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4181 #endif /* MACH_ASSERT */
4182 
4183