xref: /xnu-10063.121.3/bsd/vm/vm_unix.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58 
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93 
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97 
98 #include <kern/kalloc.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_kern.h>
101 #include <vm/vm_pageout.h>
102 
103 #include <mach/shared_region.h>
104 #include <vm/vm_shared_region.h>
105 
106 #include <vm/vm_dyld_pager.h>
107 
108 #include <vm/vm_protos.h>
109 
110 #include <sys/kern_memorystatus.h>
111 #include <sys/kern_memorystatus_freeze.h>
112 #include <sys/proc_internal.h>
113 
114 #include <mach-o/fixup-chains.h>
115 
116 #if CONFIG_MACF
117 #include <security/mac_framework.h>
118 #endif
119 
120 #include <kern/bits.h>
121 
122 #if CONFIG_CSR
123 #include <sys/csr.h>
124 #endif /* CONFIG_CSR */
125 #include <sys/trust_caches.h>
126 #include <libkern/amfi/amfi.h>
127 #include <IOKit/IOBSD.h>
128 
129 #if VM_MAP_DEBUG_APPLE_PROTECT
130 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
131 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
132 
133 #if VM_MAP_DEBUG_FOURK
134 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
135 #endif /* VM_MAP_DEBUG_FOURK */
136 
137 #if DEVELOPMENT || DEBUG
138 
139 static int
140 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 	vm_offset_t     kaddr;
144 	kern_return_t   kr;
145 	int     error = 0;
146 	int     size = 0;
147 
148 	error = sysctl_handle_int(oidp, &size, 0, req);
149 	if (error || !req->newptr) {
150 		return error;
151 	}
152 
153 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
154 	    0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
155 
156 	if (kr == KERN_SUCCESS) {
157 		kmem_free(kernel_map, kaddr, size);
158 	}
159 
160 	return error;
161 }
162 
163 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
164     0, 0, &sysctl_kmem_alloc_contig, "I", "");
165 
166 extern int vm_region_footprint;
167 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
168 
169 static int
170 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 	kmem_gobj_stats stats = kmem_get_gobj_stats();
174 
175 	return SYSCTL_OUT(req, &stats, sizeof(stats));
176 }
177 
178 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
179     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
180     0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
181 
182 #endif /* DEVELOPMENT || DEBUG */
183 
184 static int
185 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
186 {
187 #pragma unused(arg1, arg2, oidp)
188 	int     error = 0;
189 	int     value;
190 
191 	value = task_self_region_footprint();
192 	error = SYSCTL_OUT(req, &value, sizeof(int));
193 	if (error) {
194 		return error;
195 	}
196 
197 	if (!req->newptr) {
198 		return 0;
199 	}
200 
201 	error = SYSCTL_IN(req, &value, sizeof(int));
202 	if (error) {
203 		return error;
204 	}
205 	task_self_region_footprint_set(value);
206 	return 0;
207 }
208 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
209 
210 static int
211 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
212 {
213 #pragma unused(arg1, arg2, oidp)
214 	int     error = 0;
215 	int     value;
216 
217 	value = (1 << thread_self_region_page_shift());
218 	error = SYSCTL_OUT(req, &value, sizeof(int));
219 	if (error) {
220 		return error;
221 	}
222 
223 	if (!req->newptr) {
224 		return 0;
225 	}
226 
227 	error = SYSCTL_IN(req, &value, sizeof(int));
228 	if (error) {
229 		return error;
230 	}
231 
232 	if (value != 0 && value != 4096 && value != 16384) {
233 		return EINVAL;
234 	}
235 
236 #if !__ARM_MIXED_PAGE_SIZE__
237 	if (value != vm_map_page_size(current_map())) {
238 		return EINVAL;
239 	}
240 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
241 
242 	thread_self_region_page_shift_set(bit_first(value));
243 	return 0;
244 }
245 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
246 
247 
248 #if DEVELOPMENT || DEBUG
249 extern int panic_on_unsigned_execute;
250 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
251 
252 extern int vm_log_xnu_user_debug;
253 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
254 #endif /* DEVELOPMENT || DEBUG */
255 
256 extern int cs_executable_create_upl;
257 extern int cs_executable_wire;
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
259 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
260 
261 extern int apple_protect_pager_count;
262 extern int apple_protect_pager_count_mapped;
263 extern unsigned int apple_protect_pager_cache_limit;
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
265 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
266 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
267 
268 #if DEVELOPMENT || DEBUG
269 extern int radar_20146450;
270 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
271 
272 extern int macho_printf;
273 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
274 
275 extern int apple_protect_pager_data_request_debug;
276 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
277 
278 #if __arm64__
279 /* These are meant to support the page table accounting unit test. */
280 extern unsigned int arm_hardware_page_size;
281 extern unsigned int arm_pt_desc_size;
282 extern unsigned int arm_pt_root_size;
283 extern unsigned int inuse_user_tteroot_count;
284 extern unsigned int inuse_kernel_tteroot_count;
285 extern unsigned int inuse_user_ttepages_count;
286 extern unsigned int inuse_kernel_ttepages_count;
287 extern unsigned int inuse_user_ptepages_count;
288 extern unsigned int inuse_kernel_ptepages_count;
289 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
297 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
298 #if !CONFIG_SPTM
299 extern unsigned int free_page_size_tt_count;
300 extern unsigned int free_two_page_size_tt_count;
301 extern unsigned int free_tt_count;
302 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
303 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
304 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
305 #endif
306 #if DEVELOPMENT || DEBUG
307 extern unsigned long pmap_asid_flushes;
308 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
309 extern unsigned long pmap_asid_hits;
310 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
311 extern unsigned long pmap_asid_misses;
312 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
313 #endif
314 #endif /* __arm64__ */
315 
316 #if __arm64__
317 extern int fourk_pager_data_request_debug;
318 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
319 #endif /* __arm64__ */
320 #endif /* DEVELOPMENT || DEBUG */
321 
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
328 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
329 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
330 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
331 #if VM_SCAN_FOR_SHADOW_CHAIN
332 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
333 extern int proc_shadow_max(void);
334 static int
335 vm_shadow_max SYSCTL_HANDLER_ARGS
336 {
337 #pragma unused(arg1, arg2, oidp)
338 	int value = 0;
339 
340 	if (vm_shadow_max_enabled) {
341 		value = proc_shadow_max();
342 	}
343 
344 	return SYSCTL_OUT(req, &value, sizeof(value));
345 }
346 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
347     0, 0, &vm_shadow_max, "I", "");
348 
349 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
350 
351 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
352 
353 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
354 
355 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
356 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
357 /*
358  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
359  */
360 
361 #if DEVELOPMENT || DEBUG
362 extern int allow_stack_exec, allow_data_exec;
363 
364 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
365 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
366 
367 #endif /* DEVELOPMENT || DEBUG */
368 
369 static const char *prot_values[] = {
370 	"none",
371 	"read-only",
372 	"write-only",
373 	"read-write",
374 	"execute-only",
375 	"read-execute",
376 	"write-execute",
377 	"read-write-execute"
378 };
379 
380 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)381 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
382 {
383 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
384 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
385 }
386 
387 /*
388  * shared_region_unnest_logging: level of logging of unnesting events
389  * 0	- no logging
390  * 1	- throttled logging of unexpected unnesting events (default)
391  * 2	- unthrottled logging of unexpected unnesting events
392  * 3+	- unthrottled logging of all unnesting events
393  */
394 int shared_region_unnest_logging = 1;
395 
396 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
397     &shared_region_unnest_logging, 0, "");
398 
399 int vm_shared_region_unnest_log_interval = 10;
400 int shared_region_unnest_log_count_threshold = 5;
401 
402 
403 #if XNU_TARGET_OS_OSX
404 
405 #if defined (__x86_64__)
406 static int scdir_enforce = 1;
407 #else /* defined (__x86_64__) */
408 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
409 #endif /* defined (__x86_64__) */
410 
411 static char *scdir_path[] = {
412 	"/System/Library/dyld/",
413 	"/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
414 	"/System/Cryptexes/OS/System/Library/dyld",
415 	NULL
416 };
417 
418 #else /* XNU_TARGET_OS_OSX */
419 
420 static int scdir_enforce = 0;
421 static char *scdir_path[] = {
422 	"/System/Library/Caches/com.apple.dyld/",
423 	"/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
424 	"/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
425 	NULL
426 };
427 
428 #endif /* XNU_TARGET_OS_OSX */
429 
430 static char *driverkit_scdir_path[] = {
431 	"/System/DriverKit/System/Library/dyld/",
432 #if XNU_TARGET_OS_OSX
433 	"/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
434 #else
435 	"/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
436 #endif /* XNU_TARGET_OS_OSX */
437 	"/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
438 	NULL
439 };
440 
441 #ifndef SECURE_KERNEL
442 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
443 {
444 #if CONFIG_CSR
445 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
446 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
447 		return EPERM;
448 	}
449 #endif /* CONFIG_CSR */
450 	return sysctl_handle_int(oidp, arg1, arg2, req);
451 }
452 
453 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
454 #endif
455 
456 /* These log rate throttling state variables aren't thread safe, but
457  * are sufficient unto the task.
458  */
459 static int64_t last_unnest_log_time = 0;
460 static int shared_region_unnest_log_count = 0;
461 
462 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)463 log_unnest_badness(
464 	vm_map_t        m,
465 	vm_map_offset_t s,
466 	vm_map_offset_t e,
467 	boolean_t       is_nested_map,
468 	vm_map_offset_t lowest_unnestable_addr)
469 {
470 	struct timeval  tv;
471 
472 	if (shared_region_unnest_logging == 0) {
473 		return;
474 	}
475 
476 	if (shared_region_unnest_logging <= 2 &&
477 	    is_nested_map &&
478 	    s >= lowest_unnestable_addr) {
479 		/*
480 		 * Unnesting of writable map entries is fine.
481 		 */
482 		return;
483 	}
484 
485 	if (shared_region_unnest_logging <= 1) {
486 		microtime(&tv);
487 		if ((tv.tv_sec - last_unnest_log_time) <
488 		    vm_shared_region_unnest_log_interval) {
489 			if (shared_region_unnest_log_count++ >
490 			    shared_region_unnest_log_count_threshold) {
491 				return;
492 			}
493 		} else {
494 			last_unnest_log_time = tv.tv_sec;
495 			shared_region_unnest_log_count = 0;
496 		}
497 	}
498 
499 	DTRACE_VM4(log_unnest_badness,
500 	    vm_map_t, m,
501 	    vm_map_offset_t, s,
502 	    vm_map_offset_t, e,
503 	    vm_map_offset_t, lowest_unnestable_addr);
504 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
505 }
506 
507 uint64_t
vm_purge_filebacked_pagers(void)508 vm_purge_filebacked_pagers(void)
509 {
510 	uint64_t pages_purged;
511 
512 	pages_purged = 0;
513 	pages_purged += apple_protect_pager_purge_all();
514 	pages_purged += shared_region_pager_purge_all();
515 	pages_purged += dyld_pager_purge_all();
516 #if DEVELOPMENT || DEBUG
517 	printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
518 #endif /* DEVELOPMENT || DEBUG */
519 	return pages_purged;
520 }
521 
522 int
useracc(user_addr_t addr,user_size_t len,int prot)523 useracc(
524 	user_addr_t     addr,
525 	user_size_t     len,
526 	int     prot)
527 {
528 	vm_map_t        map;
529 
530 	map = current_map();
531 	return vm_map_check_protection(
532 		map,
533 		vm_map_trunc_page(addr,
534 		vm_map_page_mask(map)),
535 		vm_map_round_page(addr + len,
536 		vm_map_page_mask(map)),
537 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
538 }
539 
540 int
vslock(user_addr_t addr,user_size_t len)541 vslock(
542 	user_addr_t     addr,
543 	user_size_t     len)
544 {
545 	kern_return_t   kret;
546 	vm_map_t        map;
547 
548 	map = current_map();
549 	kret = vm_map_wire_kernel(map,
550 	    vm_map_trunc_page(addr,
551 	    vm_map_page_mask(map)),
552 	    vm_map_round_page(addr + len,
553 	    vm_map_page_mask(map)),
554 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
555 	    FALSE);
556 
557 	switch (kret) {
558 	case KERN_SUCCESS:
559 		return 0;
560 	case KERN_INVALID_ADDRESS:
561 	case KERN_NO_SPACE:
562 		return ENOMEM;
563 	case KERN_PROTECTION_FAILURE:
564 		return EACCES;
565 	default:
566 		return EINVAL;
567 	}
568 }
569 
570 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)571 vsunlock(
572 	user_addr_t addr,
573 	user_size_t len,
574 	__unused int dirtied)
575 {
576 #if FIXME  /* [ */
577 	pmap_t          pmap;
578 	vm_page_t       pg;
579 	vm_map_offset_t vaddr;
580 	ppnum_t         paddr;
581 #endif  /* FIXME ] */
582 	kern_return_t   kret;
583 	vm_map_t        map;
584 
585 	map = current_map();
586 
587 #if FIXME  /* [ */
588 	if (dirtied) {
589 		pmap = get_task_pmap(current_task());
590 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
591 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
592 		    vaddr += PAGE_SIZE) {
593 			paddr = pmap_find_phys(pmap, vaddr);
594 			pg = PHYS_TO_VM_PAGE(paddr);
595 			vm_page_set_modified(pg);
596 		}
597 	}
598 #endif  /* FIXME ] */
599 #ifdef  lint
600 	dirtied++;
601 #endif  /* lint */
602 	kret = vm_map_unwire(map,
603 	    vm_map_trunc_page(addr,
604 	    vm_map_page_mask(map)),
605 	    vm_map_round_page(addr + len,
606 	    vm_map_page_mask(map)),
607 	    FALSE);
608 	switch (kret) {
609 	case KERN_SUCCESS:
610 		return 0;
611 	case KERN_INVALID_ADDRESS:
612 	case KERN_NO_SPACE:
613 		return ENOMEM;
614 	case KERN_PROTECTION_FAILURE:
615 		return EACCES;
616 	default:
617 		return EINVAL;
618 	}
619 }
620 
621 int
subyte(user_addr_t addr,int byte)622 subyte(
623 	user_addr_t addr,
624 	int byte)
625 {
626 	char character;
627 
628 	character = (char)byte;
629 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
630 }
631 
632 int
suibyte(user_addr_t addr,int byte)633 suibyte(
634 	user_addr_t addr,
635 	int byte)
636 {
637 	char character;
638 
639 	character = (char)byte;
640 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
641 }
642 
643 int
fubyte(user_addr_t addr)644 fubyte(user_addr_t addr)
645 {
646 	unsigned char byte;
647 
648 	if (copyin(addr, (void *) &byte, sizeof(char))) {
649 		return -1;
650 	}
651 	return byte;
652 }
653 
654 int
fuibyte(user_addr_t addr)655 fuibyte(user_addr_t addr)
656 {
657 	unsigned char byte;
658 
659 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
660 		return -1;
661 	}
662 	return byte;
663 }
664 
665 int
suword(user_addr_t addr,long word)666 suword(
667 	user_addr_t addr,
668 	long word)
669 {
670 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
671 }
672 
673 long
fuword(user_addr_t addr)674 fuword(user_addr_t addr)
675 {
676 	long word = 0;
677 
678 	if (copyin(addr, (void *) &word, sizeof(int))) {
679 		return -1;
680 	}
681 	return word;
682 }
683 
684 /* suiword and fuiword are the same as suword and fuword, respectively */
685 
686 int
suiword(user_addr_t addr,long word)687 suiword(
688 	user_addr_t addr,
689 	long word)
690 {
691 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
692 }
693 
694 long
fuiword(user_addr_t addr)695 fuiword(user_addr_t addr)
696 {
697 	long word = 0;
698 
699 	if (copyin(addr, (void *) &word, sizeof(int))) {
700 		return -1;
701 	}
702 	return word;
703 }
704 
705 /*
706  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
707  * fetching and setting of process-sized size_t and pointer values.
708  */
709 int
sulong(user_addr_t addr,int64_t word)710 sulong(user_addr_t addr, int64_t word)
711 {
712 	if (IS_64BIT_PROCESS(current_proc())) {
713 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
714 	} else {
715 		return suiword(addr, (long)word);
716 	}
717 }
718 
719 int64_t
fulong(user_addr_t addr)720 fulong(user_addr_t addr)
721 {
722 	int64_t longword;
723 
724 	if (IS_64BIT_PROCESS(current_proc())) {
725 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
726 			return -1;
727 		}
728 		return longword;
729 	} else {
730 		return (int64_t)fuiword(addr);
731 	}
732 }
733 
734 int
suulong(user_addr_t addr,uint64_t uword)735 suulong(user_addr_t addr, uint64_t uword)
736 {
737 	if (IS_64BIT_PROCESS(current_proc())) {
738 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
739 	} else {
740 		return suiword(addr, (uint32_t)uword);
741 	}
742 }
743 
744 uint64_t
fuulong(user_addr_t addr)745 fuulong(user_addr_t addr)
746 {
747 	uint64_t ulongword;
748 
749 	if (IS_64BIT_PROCESS(current_proc())) {
750 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
751 			return -1ULL;
752 		}
753 		return ulongword;
754 	} else {
755 		return (uint64_t)fuiword(addr);
756 	}
757 }
758 
759 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)760 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
761 {
762 	return ENOTSUP;
763 }
764 
765 /*
766  * pid_for_task
767  *
768  * Find the BSD process ID for the Mach task associated with the given Mach port
769  * name
770  *
771  * Parameters:	args		User argument descriptor (see below)
772  *
773  * Indirect parameters:	args->t		Mach port name
774  *                      args->pid	Process ID (returned value; see below)
775  *
776  * Returns:	KERL_SUCCESS	Success
777  *              KERN_FAILURE	Not success
778  *
779  * Implicit returns: args->pid		Process ID
780  *
781  */
782 kern_return_t
pid_for_task(struct pid_for_task_args * args)783 pid_for_task(
784 	struct pid_for_task_args *args)
785 {
786 	mach_port_name_t        t = args->t;
787 	user_addr_t             pid_addr  = args->pid;
788 	proc_t p;
789 	task_t          t1;
790 	int     pid = -1;
791 	kern_return_t   err = KERN_SUCCESS;
792 
793 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
794 	AUDIT_ARG(mach_port1, t);
795 
796 	t1 = port_name_to_task_name(t);
797 
798 	if (t1 == TASK_NULL) {
799 		err = KERN_FAILURE;
800 		goto pftout;
801 	} else {
802 		p = get_bsdtask_info(t1);
803 		if (p) {
804 			pid  = proc_pid(p);
805 			err = KERN_SUCCESS;
806 		} else if (task_is_a_corpse(t1)) {
807 			pid = task_pid(t1);
808 			err = KERN_SUCCESS;
809 		} else {
810 			err = KERN_FAILURE;
811 		}
812 	}
813 	task_deallocate(t1);
814 pftout:
815 	AUDIT_ARG(pid, pid);
816 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
817 	AUDIT_MACH_SYSCALL_EXIT(err);
818 	return err;
819 }
820 
821 /*
822  *
823  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
824  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
825  *
826  */
827 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
828 
829 /*
830  *	Routine:	task_for_pid_posix_check
831  *	Purpose:
832  *			Verify that the current process should be allowed to
833  *			get the target process's task port. This is only
834  *			permitted if:
835  *			- The current process is root
836  *			OR all of the following are true:
837  *			- The target process's real, effective, and saved uids
838  *			  are the same as the current proc's euid,
839  *			- The target process's group set is a subset of the
840  *			  calling process's group set, and
841  *			- The target process hasn't switched credentials.
842  *
843  *	Returns:	TRUE: permitted
844  *			FALSE: denied
845  */
846 static int
task_for_pid_posix_check(proc_t target)847 task_for_pid_posix_check(proc_t target)
848 {
849 	kauth_cred_t targetcred, mycred;
850 	bool checkcredentials;
851 	uid_t myuid;
852 	int allowed;
853 
854 	/* No task_for_pid on bad targets */
855 	if (target->p_stat == SZOMB) {
856 		return FALSE;
857 	}
858 
859 	mycred = kauth_cred_get();
860 	myuid = kauth_cred_getuid(mycred);
861 
862 	/* If we're running as root, the check passes */
863 	if (kauth_cred_issuser(mycred)) {
864 		return TRUE;
865 	}
866 
867 	/* We're allowed to get our own task port */
868 	if (target == current_proc()) {
869 		return TRUE;
870 	}
871 
872 	/*
873 	 * Under DENY, only root can get another proc's task port,
874 	 * so no more checks are needed.
875 	 */
876 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
877 		return FALSE;
878 	}
879 
880 	targetcred = kauth_cred_proc_ref(target);
881 	allowed = TRUE;
882 
883 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
884 
885 	if (checkcredentials) {
886 		/* Do target's ruid, euid, and saved uid match my euid? */
887 		if ((kauth_cred_getuid(targetcred) != myuid) ||
888 		    (kauth_cred_getruid(targetcred) != myuid) ||
889 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
890 			allowed = FALSE;
891 			goto out;
892 		}
893 		/* Are target's groups a subset of my groups? */
894 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
895 		    allowed == 0) {
896 			allowed = FALSE;
897 			goto out;
898 		}
899 	}
900 
901 	/* Has target switched credentials? */
902 	if (target->p_flag & P_SUGID) {
903 		allowed = FALSE;
904 		goto out;
905 	}
906 
907 out:
908 	kauth_cred_unref(&targetcred);
909 	return allowed;
910 }
911 
912 /*
913  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
914  *
915  *	Description:	Waits for the user space daemon to respond to the request
916  *			we made. Function declared non inline to be visible in
917  *			stackshots and spindumps as well as debugging.
918  */
919 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)920 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
921 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
922 {
923 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
924 }
925 
926 /*
927  *	Routine:	task_for_pid
928  *	Purpose:
929  *		Get the task port for another "process", named by its
930  *		process ID on the same host as "target_task".
931  *
932  *		Only permitted to privileged processes, or processes
933  *		with the same user ID.
934  *
935  *		Note: if pid == 0, an error is return no matter who is calling.
936  *
937  * XXX This should be a BSD system call, not a Mach trap!!!
938  */
939 kern_return_t
task_for_pid(struct task_for_pid_args * args)940 task_for_pid(
941 	struct task_for_pid_args *args)
942 {
943 	mach_port_name_t        target_tport = args->target_tport;
944 	int                     pid = args->pid;
945 	user_addr_t             task_addr = args->t;
946 	proc_t                  p = PROC_NULL;
947 	task_t                  t1 = TASK_NULL;
948 	task_t                  task = TASK_NULL;
949 	mach_port_name_t        tret = MACH_PORT_NULL;
950 	ipc_port_t              tfpport = MACH_PORT_NULL;
951 	void                    * sright = NULL;
952 	int                     error = 0;
953 	boolean_t               is_current_proc = FALSE;
954 	struct proc_ident       pident = {0};
955 
956 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
957 	AUDIT_ARG(pid, pid);
958 	AUDIT_ARG(mach_port1, target_tport);
959 
960 	/* Always check if pid == 0 */
961 	if (pid == 0) {
962 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
963 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
964 		return KERN_FAILURE;
965 	}
966 
967 	t1 = port_name_to_task(target_tport);
968 	if (t1 == TASK_NULL) {
969 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
970 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
971 		return KERN_FAILURE;
972 	}
973 
974 
975 	p = proc_find(pid);
976 	if (p == PROC_NULL) {
977 		error = KERN_FAILURE;
978 		goto tfpout;
979 	}
980 	pident = proc_ident(p);
981 	is_current_proc = (p == current_proc());
982 
983 #if CONFIG_AUDIT
984 	AUDIT_ARG(process, p);
985 #endif
986 
987 	if (!(task_for_pid_posix_check(p))) {
988 		error = KERN_FAILURE;
989 		goto tfpout;
990 	}
991 
992 	if (proc_task(p) == TASK_NULL) {
993 		error = KERN_SUCCESS;
994 		goto tfpout;
995 	}
996 
997 	/*
998 	 * Grab a task reference and drop the proc reference as the proc ref
999 	 * shouldn't be held accross upcalls.
1000 	 */
1001 	task = proc_task(p);
1002 	task_reference(task);
1003 
1004 	proc_rele(p);
1005 	p = PROC_NULL;
1006 
1007 	/* IPC is not active on the task until after `exec_resettextvp` has been called.
1008 	 * We don't want to call into MAC hooks until we know that this has occured, otherwise
1009 	 * AMFI and others will read uninitialized fields from the csproc
1010 	 */
1011 	if (!task_is_ipc_active(task)) {
1012 		error = KERN_FAILURE;
1013 		goto tfpout;
1014 	}
1015 
1016 #if CONFIG_MACF
1017 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1018 	if (error) {
1019 		error = KERN_FAILURE;
1020 		goto tfpout;
1021 	}
1022 #endif
1023 
1024 	/* If we aren't root and target's task access port is set... */
1025 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1026 	    !is_current_proc &&
1027 	    (task_get_task_access_port(task, &tfpport) == 0) &&
1028 	    (tfpport != IPC_PORT_NULL)) {
1029 		if (tfpport == IPC_PORT_DEAD) {
1030 			error = KERN_PROTECTION_FAILURE;
1031 			goto tfpout;
1032 		}
1033 
1034 		/* Call up to the task access server */
1035 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1036 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1037 
1038 		if (error != MACH_MSG_SUCCESS) {
1039 			if (error == MACH_RCV_INTERRUPTED) {
1040 				error = KERN_ABORTED;
1041 			} else {
1042 				error = KERN_FAILURE;
1043 			}
1044 			goto tfpout;
1045 		}
1046 	}
1047 
1048 	/* Grant task port access */
1049 	extmod_statistics_incr_task_for_pid(task);
1050 
1051 	/* this reference will be consumed during conversion */
1052 	task_reference(task);
1053 	if (task == current_task()) {
1054 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
1055 		sright = (void *)convert_task_to_port_pinned(task);
1056 	} else {
1057 		sright = (void *)convert_task_to_port(task);
1058 	}
1059 	/* extra task ref consumed */
1060 
1061 	/*
1062 	 * Check if the task has been corpsified. We must do so after conversion
1063 	 * since we don't hold locks and may have grabbed a corpse control port
1064 	 * above which will prevent no-senders notification delivery.
1065 	 */
1066 	if (task_is_a_corpse(task)) {
1067 		ipc_port_release_send(sright);
1068 		error = KERN_FAILURE;
1069 		goto tfpout;
1070 	}
1071 
1072 	tret = ipc_port_copyout_send(
1073 		sright,
1074 		get_task_ipcspace(current_task()));
1075 
1076 	error = KERN_SUCCESS;
1077 
1078 tfpout:
1079 	task_deallocate(t1);
1080 	AUDIT_ARG(mach_port2, tret);
1081 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1082 
1083 	if (tfpport != IPC_PORT_NULL) {
1084 		ipc_port_release_send(tfpport);
1085 	}
1086 	if (task != TASK_NULL) {
1087 		task_deallocate(task);
1088 	}
1089 	if (p != PROC_NULL) {
1090 		proc_rele(p);
1091 	}
1092 	AUDIT_MACH_SYSCALL_EXIT(error);
1093 	return error;
1094 }
1095 
1096 /*
1097  *	Routine:	task_name_for_pid
1098  *	Purpose:
1099  *		Get the task name port for another "process", named by its
1100  *		process ID on the same host as "target_task".
1101  *
1102  *		Only permitted to privileged processes, or processes
1103  *		with the same user ID.
1104  *
1105  * XXX This should be a BSD system call, not a Mach trap!!!
1106  */
1107 
1108 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1109 task_name_for_pid(
1110 	struct task_name_for_pid_args *args)
1111 {
1112 	mach_port_name_t        target_tport = args->target_tport;
1113 	int                     pid = args->pid;
1114 	user_addr_t             task_addr = args->t;
1115 	proc_t                  p = PROC_NULL;
1116 	task_t                  t1 = TASK_NULL;
1117 	mach_port_name_t        tret = MACH_PORT_NULL;
1118 	void * sright;
1119 	int error = 0, refheld = 0;
1120 	kauth_cred_t target_cred;
1121 
1122 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1123 	AUDIT_ARG(pid, pid);
1124 	AUDIT_ARG(mach_port1, target_tport);
1125 
1126 	t1 = port_name_to_task(target_tport);
1127 	if (t1 == TASK_NULL) {
1128 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1129 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1130 		return KERN_FAILURE;
1131 	}
1132 
1133 	p = proc_find(pid);
1134 	if (p != PROC_NULL) {
1135 		AUDIT_ARG(process, p);
1136 		target_cred = kauth_cred_proc_ref(p);
1137 		refheld = 1;
1138 
1139 		if ((p->p_stat != SZOMB)
1140 		    && ((current_proc() == p)
1141 		    || kauth_cred_issuser(kauth_cred_get())
1142 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1143 		    ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1144 		    || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1145 		    )) {
1146 			if (proc_task(p) != TASK_NULL) {
1147 				struct proc_ident pident = proc_ident(p);
1148 
1149 				task_t task = proc_task(p);
1150 
1151 				task_reference(task);
1152 				proc_rele(p);
1153 				p = PROC_NULL;
1154 #if CONFIG_MACF
1155 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1156 				if (error) {
1157 					task_deallocate(task);
1158 					goto noperm;
1159 				}
1160 #endif
1161 				sright = (void *)convert_task_name_to_port(task);
1162 				task = NULL;
1163 				tret = ipc_port_copyout_send(sright,
1164 				    get_task_ipcspace(current_task()));
1165 			} else {
1166 				tret  = MACH_PORT_NULL;
1167 			}
1168 
1169 			AUDIT_ARG(mach_port2, tret);
1170 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1171 			task_deallocate(t1);
1172 			error = KERN_SUCCESS;
1173 			goto tnfpout;
1174 		}
1175 	}
1176 
1177 #if CONFIG_MACF
1178 noperm:
1179 #endif
1180 	task_deallocate(t1);
1181 	tret = MACH_PORT_NULL;
1182 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1183 	error = KERN_FAILURE;
1184 tnfpout:
1185 	if (refheld != 0) {
1186 		kauth_cred_unref(&target_cred);
1187 	}
1188 	if (p != PROC_NULL) {
1189 		proc_rele(p);
1190 	}
1191 	AUDIT_MACH_SYSCALL_EXIT(error);
1192 	return error;
1193 }
1194 
1195 /*
1196  *	Routine:	task_inspect_for_pid
1197  *	Purpose:
1198  *		Get the task inspect port for another "process", named by its
1199  *		process ID on the same host as "target_task".
1200  */
1201 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1202 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1203 {
1204 	mach_port_name_t        target_tport = args->target_tport;
1205 	int                     pid = args->pid;
1206 	user_addr_t             task_addr = args->t;
1207 
1208 	proc_t                  proc = PROC_NULL;
1209 	task_t                  t1 = TASK_NULL;
1210 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1211 	mach_port_name_t        tret = MACH_PORT_NULL;
1212 	ipc_port_t              tfpport = MACH_PORT_NULL;
1213 	int                     error = 0;
1214 	void                    *sright = NULL;
1215 	boolean_t               is_current_proc = FALSE;
1216 	struct proc_ident       pident = {0};
1217 
1218 	/* Disallow inspect port for kernel_task */
1219 	if (pid == 0) {
1220 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1221 		return EPERM;
1222 	}
1223 
1224 	t1 = port_name_to_task(target_tport);
1225 	if (t1 == TASK_NULL) {
1226 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1227 		return EINVAL;
1228 	}
1229 
1230 	proc = proc_find(pid);
1231 	if (proc == PROC_NULL) {
1232 		error = ESRCH;
1233 		goto tifpout;
1234 	}
1235 	pident = proc_ident(proc);
1236 	is_current_proc = (proc == current_proc());
1237 
1238 	if (!(task_for_pid_posix_check(proc))) {
1239 		error = EPERM;
1240 		goto tifpout;
1241 	}
1242 
1243 	task_insp = proc_task(proc);
1244 	if (task_insp == TASK_INSPECT_NULL) {
1245 		goto tifpout;
1246 	}
1247 
1248 	/*
1249 	 * Grab a task reference and drop the proc reference before making any upcalls.
1250 	 */
1251 	task_reference(task_insp);
1252 
1253 	proc_rele(proc);
1254 	proc = PROC_NULL;
1255 
1256 #if CONFIG_MACF
1257 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1258 	if (error) {
1259 		error = EPERM;
1260 		goto tifpout;
1261 	}
1262 #endif
1263 
1264 	/* If we aren't root and target's task access port is set... */
1265 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1266 	    !is_current_proc &&
1267 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1268 	    (tfpport != IPC_PORT_NULL)) {
1269 		if (tfpport == IPC_PORT_DEAD) {
1270 			error = EACCES;
1271 			goto tifpout;
1272 		}
1273 
1274 
1275 		/* Call up to the task access server */
1276 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1277 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1278 
1279 		if (error != MACH_MSG_SUCCESS) {
1280 			if (error == MACH_RCV_INTERRUPTED) {
1281 				error = EINTR;
1282 			} else {
1283 				error = EPERM;
1284 			}
1285 			goto tifpout;
1286 		}
1287 	}
1288 
1289 	/* Check if the task has been corpsified */
1290 	if (task_is_a_corpse(task_insp)) {
1291 		error = EACCES;
1292 		goto tifpout;
1293 	}
1294 
1295 	/* could be IP_NULL, consumes a ref */
1296 	sright = (void*) convert_task_inspect_to_port(task_insp);
1297 	task_insp = TASK_INSPECT_NULL;
1298 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1299 
1300 tifpout:
1301 	task_deallocate(t1);
1302 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1303 	if (proc != PROC_NULL) {
1304 		proc_rele(proc);
1305 	}
1306 	if (tfpport != IPC_PORT_NULL) {
1307 		ipc_port_release_send(tfpport);
1308 	}
1309 	if (task_insp != TASK_INSPECT_NULL) {
1310 		task_deallocate(task_insp);
1311 	}
1312 
1313 	*ret = error;
1314 	return error;
1315 }
1316 
1317 /*
1318  *	Routine:	task_read_for_pid
1319  *	Purpose:
1320  *		Get the task read port for another "process", named by its
1321  *		process ID on the same host as "target_task".
1322  */
1323 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1324 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1325 {
1326 	mach_port_name_t        target_tport = args->target_tport;
1327 	int                     pid = args->pid;
1328 	user_addr_t             task_addr = args->t;
1329 
1330 	proc_t                  proc = PROC_NULL;
1331 	task_t                  t1 = TASK_NULL;
1332 	task_read_t             task_read = TASK_READ_NULL;
1333 	mach_port_name_t        tret = MACH_PORT_NULL;
1334 	ipc_port_t              tfpport = MACH_PORT_NULL;
1335 	int                     error = 0;
1336 	void                    *sright = NULL;
1337 	boolean_t               is_current_proc = FALSE;
1338 	struct proc_ident       pident = {0};
1339 
1340 	/* Disallow read port for kernel_task */
1341 	if (pid == 0) {
1342 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1343 		return EPERM;
1344 	}
1345 
1346 	t1 = port_name_to_task(target_tport);
1347 	if (t1 == TASK_NULL) {
1348 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1349 		return EINVAL;
1350 	}
1351 
1352 	proc = proc_find(pid);
1353 	if (proc == PROC_NULL) {
1354 		error = ESRCH;
1355 		goto trfpout;
1356 	}
1357 	pident = proc_ident(proc);
1358 	is_current_proc = (proc == current_proc());
1359 
1360 	if (!(task_for_pid_posix_check(proc))) {
1361 		error = EPERM;
1362 		goto trfpout;
1363 	}
1364 
1365 	task_read = proc_task(proc);
1366 	if (task_read == TASK_INSPECT_NULL) {
1367 		goto trfpout;
1368 	}
1369 
1370 	/*
1371 	 * Grab a task reference and drop the proc reference before making any upcalls.
1372 	 */
1373 	task_reference(task_read);
1374 
1375 	proc_rele(proc);
1376 	proc = PROC_NULL;
1377 
1378 #if CONFIG_MACF
1379 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1380 	if (error) {
1381 		error = EPERM;
1382 		goto trfpout;
1383 	}
1384 #endif
1385 
1386 	/* If we aren't root and target's task access port is set... */
1387 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1388 	    !is_current_proc &&
1389 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1390 	    (tfpport != IPC_PORT_NULL)) {
1391 		if (tfpport == IPC_PORT_DEAD) {
1392 			error = EACCES;
1393 			goto trfpout;
1394 		}
1395 
1396 
1397 		/* Call up to the task access server */
1398 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1399 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1400 
1401 		if (error != MACH_MSG_SUCCESS) {
1402 			if (error == MACH_RCV_INTERRUPTED) {
1403 				error = EINTR;
1404 			} else {
1405 				error = EPERM;
1406 			}
1407 			goto trfpout;
1408 		}
1409 	}
1410 
1411 	/* Check if the task has been corpsified */
1412 	if (task_is_a_corpse(task_read)) {
1413 		error = EACCES;
1414 		goto trfpout;
1415 	}
1416 
1417 	/* could be IP_NULL, consumes a ref */
1418 	sright = (void*) convert_task_read_to_port(task_read);
1419 	task_read = TASK_READ_NULL;
1420 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1421 
1422 trfpout:
1423 	task_deallocate(t1);
1424 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1425 	if (proc != PROC_NULL) {
1426 		proc_rele(proc);
1427 	}
1428 	if (tfpport != IPC_PORT_NULL) {
1429 		ipc_port_release_send(tfpport);
1430 	}
1431 	if (task_read != TASK_READ_NULL) {
1432 		task_deallocate(task_read);
1433 	}
1434 
1435 	*ret = error;
1436 	return error;
1437 }
1438 
1439 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1440 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1441 {
1442 	task_t  target = NULL;
1443 	proc_t  targetproc = PROC_NULL;
1444 	int     pid = args->pid;
1445 	int     error = 0;
1446 	mach_port_t tfpport = MACH_PORT_NULL;
1447 
1448 	if (pid == 0) {
1449 		error = EPERM;
1450 		goto out;
1451 	}
1452 
1453 	targetproc = proc_find(pid);
1454 	if (targetproc == PROC_NULL) {
1455 		error = ESRCH;
1456 		goto out;
1457 	}
1458 
1459 	if (!task_for_pid_posix_check(targetproc) &&
1460 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1461 		error = EPERM;
1462 		goto out;
1463 	}
1464 
1465 #if CONFIG_MACF
1466 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1467 	if (error) {
1468 		error = EPERM;
1469 		goto out;
1470 	}
1471 #endif
1472 
1473 	target = proc_task(targetproc);
1474 #if XNU_TARGET_OS_OSX
1475 	if (target != TASK_NULL) {
1476 		/* If we aren't root and target's task access port is set... */
1477 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1478 		    targetproc != current_proc() &&
1479 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1480 		    (tfpport != IPC_PORT_NULL)) {
1481 			if (tfpport == IPC_PORT_DEAD) {
1482 				error = EACCES;
1483 				goto out;
1484 			}
1485 
1486 			/* Call up to the task access server */
1487 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1488 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1489 
1490 			if (error != MACH_MSG_SUCCESS) {
1491 				if (error == MACH_RCV_INTERRUPTED) {
1492 					error = EINTR;
1493 				} else {
1494 					error = EPERM;
1495 				}
1496 				goto out;
1497 			}
1498 		}
1499 	}
1500 #endif /* XNU_TARGET_OS_OSX */
1501 
1502 	task_reference(target);
1503 	error = task_pidsuspend(target);
1504 	if (error) {
1505 		if (error == KERN_INVALID_ARGUMENT) {
1506 			error = EINVAL;
1507 		} else {
1508 			error = EPERM;
1509 		}
1510 	}
1511 #if CONFIG_MEMORYSTATUS
1512 	else {
1513 		memorystatus_on_suspend(targetproc);
1514 	}
1515 #endif
1516 
1517 	task_deallocate(target);
1518 
1519 out:
1520 	if (tfpport != IPC_PORT_NULL) {
1521 		ipc_port_release_send(tfpport);
1522 	}
1523 
1524 	if (targetproc != PROC_NULL) {
1525 		proc_rele(targetproc);
1526 	}
1527 	*ret = error;
1528 	return error;
1529 }
1530 
1531 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1532 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1533 {
1534 	mach_port_name_t        target_tport = args->target_tport;
1535 	int                     pid = args->pid;
1536 	user_addr_t             task_addr = args->t;
1537 	proc_t                  p = PROC_NULL;
1538 	task_t                  t1 = TASK_NULL;
1539 	task_t                  task = TASK_NULL;
1540 	mach_port_name_t        tret = MACH_PORT_NULL;
1541 	ipc_port_t              tfpport = MACH_PORT_NULL;
1542 	ipc_port_t              sright = NULL;
1543 	int                     error = 0;
1544 	boolean_t               is_current_proc = FALSE;
1545 	struct proc_ident       pident = {0};
1546 
1547 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1548 	AUDIT_ARG(pid, pid);
1549 	AUDIT_ARG(mach_port1, target_tport);
1550 
1551 	/* Always check if pid == 0 */
1552 	if (pid == 0) {
1553 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1554 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1555 		return KERN_FAILURE;
1556 	}
1557 
1558 	t1 = port_name_to_task(target_tport);
1559 	if (t1 == TASK_NULL) {
1560 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1561 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1562 		return KERN_FAILURE;
1563 	}
1564 
1565 	p = proc_find(pid);
1566 	if (p == PROC_NULL) {
1567 		error = KERN_FAILURE;
1568 		goto tfpout;
1569 	}
1570 	pident = proc_ident(p);
1571 	is_current_proc = (p == current_proc());
1572 
1573 #if CONFIG_AUDIT
1574 	AUDIT_ARG(process, p);
1575 #endif
1576 
1577 	if (!(task_for_pid_posix_check(p))) {
1578 		error = KERN_FAILURE;
1579 		goto tfpout;
1580 	}
1581 
1582 	if (proc_task(p) == TASK_NULL) {
1583 		error = KERN_SUCCESS;
1584 		goto tfpout;
1585 	}
1586 
1587 	/*
1588 	 * Grab a task reference and drop the proc reference before making any upcalls.
1589 	 */
1590 	task = proc_task(p);
1591 	task_reference(task);
1592 
1593 	proc_rele(p);
1594 	p = PROC_NULL;
1595 
1596 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1597 #if CONFIG_MACF
1598 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1599 		if (error) {
1600 			error = KERN_FAILURE;
1601 			goto tfpout;
1602 		}
1603 #endif
1604 
1605 		/* If we aren't root and target's task access port is set... */
1606 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1607 		    !is_current_proc &&
1608 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1609 		    (tfpport != IPC_PORT_NULL)) {
1610 			if (tfpport == IPC_PORT_DEAD) {
1611 				error = KERN_PROTECTION_FAILURE;
1612 				goto tfpout;
1613 			}
1614 
1615 
1616 			/* Call up to the task access server */
1617 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1618 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1619 
1620 			if (error != MACH_MSG_SUCCESS) {
1621 				if (error == MACH_RCV_INTERRUPTED) {
1622 					error = KERN_ABORTED;
1623 				} else {
1624 					error = KERN_FAILURE;
1625 				}
1626 				goto tfpout;
1627 			}
1628 		}
1629 	}
1630 
1631 	/* Check if the task has been corpsified */
1632 	if (task_is_a_corpse(task)) {
1633 		error = KERN_FAILURE;
1634 		goto tfpout;
1635 	}
1636 
1637 	error = task_get_debug_control_port(task, &sright);
1638 	if (error != KERN_SUCCESS) {
1639 		goto tfpout;
1640 	}
1641 
1642 	tret = ipc_port_copyout_send(
1643 		sright,
1644 		get_task_ipcspace(current_task()));
1645 
1646 	error = KERN_SUCCESS;
1647 
1648 tfpout:
1649 	task_deallocate(t1);
1650 	AUDIT_ARG(mach_port2, tret);
1651 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1652 
1653 	if (tfpport != IPC_PORT_NULL) {
1654 		ipc_port_release_send(tfpport);
1655 	}
1656 	if (task != TASK_NULL) {
1657 		task_deallocate(task);
1658 	}
1659 	if (p != PROC_NULL) {
1660 		proc_rele(p);
1661 	}
1662 	AUDIT_MACH_SYSCALL_EXIT(error);
1663 	return error;
1664 }
1665 
1666 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1667 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1668 {
1669 	task_t  target = NULL;
1670 	proc_t  targetproc = PROC_NULL;
1671 	int     pid = args->pid;
1672 	int     error = 0;
1673 	mach_port_t tfpport = MACH_PORT_NULL;
1674 
1675 	if (pid == 0) {
1676 		error = EPERM;
1677 		goto out;
1678 	}
1679 
1680 	targetproc = proc_find(pid);
1681 	if (targetproc == PROC_NULL) {
1682 		error = ESRCH;
1683 		goto out;
1684 	}
1685 
1686 	if (!task_for_pid_posix_check(targetproc) &&
1687 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1688 		error = EPERM;
1689 		goto out;
1690 	}
1691 
1692 #if CONFIG_MACF
1693 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1694 	if (error) {
1695 		error = EPERM;
1696 		goto out;
1697 	}
1698 #endif
1699 
1700 	target = proc_task(targetproc);
1701 #if XNU_TARGET_OS_OSX
1702 	if (target != TASK_NULL) {
1703 		/* If we aren't root and target's task access port is set... */
1704 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1705 		    targetproc != current_proc() &&
1706 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1707 		    (tfpport != IPC_PORT_NULL)) {
1708 			if (tfpport == IPC_PORT_DEAD) {
1709 				error = EACCES;
1710 				goto out;
1711 			}
1712 
1713 			/* Call up to the task access server */
1714 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1715 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1716 
1717 			if (error != MACH_MSG_SUCCESS) {
1718 				if (error == MACH_RCV_INTERRUPTED) {
1719 					error = EINTR;
1720 				} else {
1721 					error = EPERM;
1722 				}
1723 				goto out;
1724 			}
1725 		}
1726 	}
1727 #endif /* XNU_TARGET_OS_OSX */
1728 
1729 #if !XNU_TARGET_OS_OSX
1730 #if SOCKETS
1731 	resume_proc_sockets(targetproc);
1732 #endif /* SOCKETS */
1733 #endif /* !XNU_TARGET_OS_OSX */
1734 
1735 	task_reference(target);
1736 
1737 #if CONFIG_MEMORYSTATUS
1738 	memorystatus_on_resume(targetproc);
1739 #endif
1740 
1741 	error = task_pidresume(target);
1742 	if (error) {
1743 		if (error == KERN_INVALID_ARGUMENT) {
1744 			error = EINVAL;
1745 		} else {
1746 			if (error == KERN_MEMORY_ERROR) {
1747 				psignal(targetproc, SIGKILL);
1748 				error = EIO;
1749 			} else {
1750 				error = EPERM;
1751 			}
1752 		}
1753 	}
1754 
1755 	task_deallocate(target);
1756 
1757 out:
1758 	if (tfpport != IPC_PORT_NULL) {
1759 		ipc_port_release_send(tfpport);
1760 	}
1761 
1762 	if (targetproc != PROC_NULL) {
1763 		proc_rele(targetproc);
1764 	}
1765 
1766 	*ret = error;
1767 	return error;
1768 }
1769 
1770 #if !XNU_TARGET_OS_OSX
1771 /*
1772  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1773  * When a process is specified, this call is blocking, otherwise we wake up the
1774  * freezer thread and do not block on a process being frozen.
1775  */
1776 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1777 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1778 {
1779 	int     error = 0;
1780 	proc_t  targetproc = PROC_NULL;
1781 	int     pid = args->pid;
1782 
1783 #ifndef CONFIG_FREEZE
1784 	#pragma unused(pid)
1785 #else
1786 
1787 	/*
1788 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1789 	 */
1790 
1791 	if (pid >= 0) {
1792 		targetproc = proc_find(pid);
1793 
1794 		if (targetproc == PROC_NULL) {
1795 			error = ESRCH;
1796 			goto out;
1797 		}
1798 
1799 		if (!task_for_pid_posix_check(targetproc)) {
1800 			error = EPERM;
1801 			goto out;
1802 		}
1803 	}
1804 
1805 #if CONFIG_MACF
1806 	//Note that targetproc may be null
1807 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1808 	if (error) {
1809 		error = EPERM;
1810 		goto out;
1811 	}
1812 #endif
1813 
1814 	if (pid == -2) {
1815 		vm_pageout_anonymous_pages();
1816 	} else if (pid == -1) {
1817 		memorystatus_on_inactivity(targetproc);
1818 	} else {
1819 		error = memorystatus_freeze_process_sync(targetproc);
1820 	}
1821 
1822 out:
1823 
1824 #endif /* CONFIG_FREEZE */
1825 
1826 	if (targetproc != PROC_NULL) {
1827 		proc_rele(targetproc);
1828 	}
1829 	*ret = error;
1830 	return error;
1831 }
1832 #endif /* !XNU_TARGET_OS_OSX */
1833 
1834 #if SOCKETS
1835 int
networking_memstatus_callout(proc_t p,uint32_t status)1836 networking_memstatus_callout(proc_t p, uint32_t status)
1837 {
1838 	struct fileproc *fp;
1839 
1840 	/*
1841 	 * proc list lock NOT held
1842 	 * proc lock NOT held
1843 	 * a reference on the proc has been held / shall be dropped by the caller.
1844 	 */
1845 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1846 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1847 
1848 	proc_fdlock(p);
1849 
1850 	fdt_foreach(fp, p) {
1851 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1852 #if NECP
1853 		case DTYPE_NETPOLICY:
1854 			necp_fd_memstatus(p, status,
1855 			    (struct necp_fd_data *)fp_get_data(fp));
1856 			break;
1857 #endif /* NECP */
1858 #if SKYWALK
1859 		case DTYPE_CHANNEL:
1860 			kern_channel_memstatus(p, status,
1861 			    (struct kern_channel *)fp_get_data(fp));
1862 			break;
1863 #endif /* SKYWALK */
1864 		default:
1865 			break;
1866 		}
1867 	}
1868 	proc_fdunlock(p);
1869 
1870 	return 1;
1871 }
1872 
1873 #if SKYWALK
1874 /*
1875  * Since we make multiple passes across the fileproc array, record the
1876  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1877  * large enough to accomodate most, if not all cases.  If we find more,
1878  * we'll go to the slow path during second pass.
1879  */
1880 #define MAX_CHANNELS    8       /* should be more than enough */
1881 #endif /* SKYWALK */
1882 
1883 static int
networking_defunct_callout(proc_t p,void * arg)1884 networking_defunct_callout(proc_t p, void *arg)
1885 {
1886 	struct pid_shutdown_sockets_args *args = arg;
1887 	int pid = args->pid;
1888 	int level = args->level;
1889 	struct fileproc *fp;
1890 #if SKYWALK
1891 	int i;
1892 	int channel_count = 0;
1893 	struct kern_channel *channel_array[MAX_CHANNELS];
1894 
1895 	bzero(&channel_array, sizeof(channel_array));
1896 #endif /* SKYWALK */
1897 
1898 	proc_fdlock(p);
1899 
1900 	fdt_foreach(fp, p) {
1901 		struct fileglob *fg = fp->fp_glob;
1902 
1903 		switch (FILEGLOB_DTYPE(fg)) {
1904 		case DTYPE_SOCKET: {
1905 			struct socket *so = (struct socket *)fg_get_data(fg);
1906 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1907 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1908 				/* Call networking stack with socket and level */
1909 				(void)socket_defunct(p, so, level);
1910 			}
1911 			break;
1912 		}
1913 #if NECP
1914 		case DTYPE_NETPOLICY:
1915 			/* first pass: defunct necp and get stats for ntstat */
1916 			if (proc_getpid(p) == pid) {
1917 				necp_fd_defunct(p,
1918 				    (struct necp_fd_data *)fg_get_data(fg));
1919 			}
1920 			break;
1921 #endif /* NECP */
1922 #if SKYWALK
1923 		case DTYPE_CHANNEL:
1924 			/* first pass: get channels and total count */
1925 			if (proc_getpid(p) == pid) {
1926 				if (channel_count < MAX_CHANNELS) {
1927 					channel_array[channel_count] =
1928 					    (struct kern_channel *)fg_get_data(fg);
1929 				}
1930 				++channel_count;
1931 			}
1932 			break;
1933 #endif /* SKYWALK */
1934 		default:
1935 			break;
1936 		}
1937 	}
1938 
1939 #if SKYWALK
1940 	/*
1941 	 * Second pass: defunct channels/flows (after NECP).  Handle
1942 	 * the common case of up to MAX_CHANNELS count with fast path,
1943 	 * and traverse the fileproc array again only if we exceed it.
1944 	 */
1945 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1946 		ASSERT(proc_getpid(p) == pid);
1947 		for (i = 0; i < channel_count; i++) {
1948 			ASSERT(channel_array[i] != NULL);
1949 			kern_channel_defunct(p, channel_array[i]);
1950 		}
1951 	} else if (channel_count != 0) {
1952 		ASSERT(proc_getpid(p) == pid);
1953 		fdt_foreach(fp, p) {
1954 			struct fileglob *fg = fp->fp_glob;
1955 
1956 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1957 				kern_channel_defunct(p,
1958 				    (struct kern_channel *)fg_get_data(fg));
1959 			}
1960 		}
1961 	}
1962 #endif /* SKYWALK */
1963 	proc_fdunlock(p);
1964 
1965 	return PROC_RETURNED;
1966 }
1967 
1968 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1969 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1970 {
1971 	int                             error = 0;
1972 	proc_t                          targetproc = PROC_NULL;
1973 	int                             pid = args->pid;
1974 	int                             level = args->level;
1975 
1976 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1977 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1978 		error = EINVAL;
1979 		goto out;
1980 	}
1981 
1982 	targetproc = proc_find(pid);
1983 	if (targetproc == PROC_NULL) {
1984 		error = ESRCH;
1985 		goto out;
1986 	}
1987 
1988 	if (!task_for_pid_posix_check(targetproc) &&
1989 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1990 		error = EPERM;
1991 		goto out;
1992 	}
1993 
1994 #if CONFIG_MACF
1995 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1996 	if (error) {
1997 		error = EPERM;
1998 		goto out;
1999 	}
2000 #endif
2001 
2002 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
2003 	    networking_defunct_callout, args, NULL, NULL);
2004 
2005 out:
2006 	if (targetproc != PROC_NULL) {
2007 		proc_rele(targetproc);
2008 	}
2009 	*ret = error;
2010 	return error;
2011 }
2012 
2013 #endif /* SOCKETS */
2014 
2015 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)2016 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2017     __unused int arg2, struct sysctl_req *req)
2018 {
2019 	int error = 0;
2020 	int new_value;
2021 
2022 	error = SYSCTL_OUT(req, arg1, sizeof(int));
2023 	if (error || req->newptr == USER_ADDR_NULL) {
2024 		return error;
2025 	}
2026 
2027 	if (!kauth_cred_issuser(kauth_cred_get())) {
2028 		return EPERM;
2029 	}
2030 
2031 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2032 		goto out;
2033 	}
2034 	if ((new_value == KERN_TFP_POLICY_DENY)
2035 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2036 		tfp_policy = new_value;
2037 	} else {
2038 		error = EINVAL;
2039 	}
2040 out:
2041 	return error;
2042 }
2043 
2044 #if defined(SECURE_KERNEL)
2045 static int kern_secure_kernel = 1;
2046 #else
2047 static int kern_secure_kernel = 0;
2048 #endif
2049 
2050 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2051 
2052 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2053 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2054     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2055 
2056 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2057     &shared_region_trace_level, 0, "");
2058 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2059     &shared_region_version, 0, "");
2060 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2061     &shared_region_persistence, 0, "");
2062 
2063 /*
2064  * shared_region_check_np:
2065  *
2066  * This system call is intended for dyld.
2067  *
2068  * dyld calls this when any process starts to see if the process's shared
2069  * region is already set up and ready to use.
2070  * This call returns the base address of the first mapping in the
2071  * process's shared region's first mapping.
2072  * dyld will then check what's mapped at that address.
2073  *
2074  * If the shared region is empty, dyld will then attempt to map the shared
2075  * cache file in the shared region via the shared_region_map_np() system call.
2076  *
2077  * If something's already mapped in the shared region, dyld will check if it
2078  * matches the shared cache it would like to use for that process.
2079  * If it matches, evrything's ready and the process can proceed and use the
2080  * shared region.
2081  * If it doesn't match, dyld will unmap the shared region and map the shared
2082  * cache into the process's address space via mmap().
2083  *
2084  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2085  * the shared region. We will remove the shared_region reference from the task.
2086  *
2087  * ERROR VALUES
2088  * EINVAL	no shared region
2089  * ENOMEM	shared region is empty
2090  * EFAULT	bad address for "start_address"
2091  */
2092 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2093 shared_region_check_np(
2094 	__unused struct proc                    *p,
2095 	struct shared_region_check_np_args      *uap,
2096 	__unused int                            *retvalp)
2097 {
2098 	vm_shared_region_t      shared_region;
2099 	mach_vm_offset_t        start_address = 0;
2100 	int                     error = 0;
2101 	kern_return_t           kr;
2102 	task_t                  task = current_task();
2103 
2104 	SHARED_REGION_TRACE_DEBUG(
2105 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2106 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2107 		proc_getpid(p), p->p_comm,
2108 		(uint64_t)uap->start_address));
2109 
2110 	/*
2111 	 * Special value of start_address used to indicate that map_with_linking() should
2112 	 * no longer be allowed in this process
2113 	 */
2114 	if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2115 		p->p_disallow_map_with_linking = TRUE;
2116 		return 0;
2117 	}
2118 
2119 	/* retrieve the current tasks's shared region */
2120 	shared_region = vm_shared_region_get(task);
2121 	if (shared_region != NULL) {
2122 		/*
2123 		 * A NULL argument is used by dyld to indicate the task
2124 		 * has unmapped its shared region.
2125 		 */
2126 		if (uap->start_address == 0) {
2127 			/* unmap it first */
2128 			vm_shared_region_remove(task, shared_region);
2129 			vm_shared_region_set(task, NULL);
2130 		} else {
2131 			/* retrieve address of its first mapping... */
2132 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2133 			if (kr != KERN_SUCCESS) {
2134 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2135 				    "check_np(0x%llx) "
2136 				    "vm_shared_region_start_address() failed\n",
2137 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2138 				    proc_getpid(p), p->p_comm,
2139 				    (uint64_t)uap->start_address));
2140 				error = ENOMEM;
2141 			} else {
2142 #if __has_feature(ptrauth_calls)
2143 				/*
2144 				 * Remap any section of the shared library that
2145 				 * has authenticated pointers into private memory.
2146 				 */
2147 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2148 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2149 					    "check_np(0x%llx) "
2150 					    "vm_shared_region_auth_remap() failed\n",
2151 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2152 					    proc_getpid(p), p->p_comm,
2153 					    (uint64_t)uap->start_address));
2154 					error = ENOMEM;
2155 				}
2156 #endif /* __has_feature(ptrauth_calls) */
2157 
2158 				/* ... and give it to the caller */
2159 				if (error == 0) {
2160 					error = copyout(&start_address,
2161 					    (user_addr_t) uap->start_address,
2162 					    sizeof(start_address));
2163 					if (error != 0) {
2164 						SHARED_REGION_TRACE_ERROR(
2165 							("shared_region: %p [%d(%s)] "
2166 							"check_np(0x%llx) "
2167 							"copyout(0x%llx) error %d\n",
2168 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2169 							proc_getpid(p), p->p_comm,
2170 							(uint64_t)uap->start_address, (uint64_t)start_address,
2171 							error));
2172 					}
2173 				}
2174 			}
2175 		}
2176 		vm_shared_region_deallocate(shared_region);
2177 	} else {
2178 		/* no shared region ! */
2179 		error = EINVAL;
2180 	}
2181 
2182 	SHARED_REGION_TRACE_DEBUG(
2183 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2184 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2185 		proc_getpid(p), p->p_comm,
2186 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2187 
2188 	return error;
2189 }
2190 
2191 
2192 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2193 shared_region_copyin(
2194 	struct proc  *p,
2195 	user_addr_t  user_addr,
2196 	unsigned int count,
2197 	unsigned int element_size,
2198 	void         *kernel_data)
2199 {
2200 	int             error = 0;
2201 	vm_size_t       size = count * element_size;
2202 
2203 	error = copyin(user_addr, kernel_data, size);
2204 	if (error) {
2205 		SHARED_REGION_TRACE_ERROR(
2206 			("shared_region: %p [%d(%s)] map(): "
2207 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2208 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2209 			proc_getpid(p), p->p_comm,
2210 			(uint64_t)user_addr, (long)size, error));
2211 	}
2212 	return error;
2213 }
2214 
2215 /*
2216  * A reasonable upper limit to prevent overflow of allocation/copyin.
2217  */
2218 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2219 
2220 /* forward declaration */
2221 __attribute__((noinline))
2222 static void shared_region_map_and_slide_cleanup(
2223 	struct proc              *p,
2224 	uint32_t                 files_count,
2225 	struct _sr_file_mappings *sr_file_mappings,
2226 	struct vm_shared_region  *shared_region);
2227 
2228 /*
2229  * Setup part of _shared_region_map_and_slide().
2230  * It had to be broken out of _shared_region_map_and_slide() to
2231  * prevent compiler inlining from blowing out the stack.
2232  */
2233 __attribute__((noinline))
2234 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2235 shared_region_map_and_slide_setup(
2236 	struct proc                         *p,
2237 	uint32_t                            files_count,
2238 	struct shared_file_np               *files,
2239 	uint32_t                            mappings_count,
2240 	struct shared_file_mapping_slide_np *mappings,
2241 	struct _sr_file_mappings            **sr_file_mappings,
2242 	struct vm_shared_region             **shared_region_ptr,
2243 	struct vnode                        *rdir_vp)
2244 {
2245 	int                             error = 0;
2246 	struct _sr_file_mappings        *srfmp;
2247 	uint32_t                        mappings_next;
2248 	struct vnode_attr               va;
2249 	off_t                           fs;
2250 #if CONFIG_MACF
2251 	vm_prot_t                       maxprot = VM_PROT_ALL;
2252 #endif
2253 	uint32_t                        i;
2254 	struct vm_shared_region         *shared_region = NULL;
2255 	boolean_t                       is_driverkit = task_is_driver(current_task());
2256 
2257 	SHARED_REGION_TRACE_DEBUG(
2258 		("shared_region: %p [%d(%s)] -> map\n",
2259 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2260 		proc_getpid(p), p->p_comm));
2261 
2262 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2263 		error = E2BIG;
2264 		goto done;
2265 	}
2266 	if (files_count == 0) {
2267 		error = EINVAL;
2268 		goto done;
2269 	}
2270 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2271 	    Z_WAITOK | Z_ZERO);
2272 	if (*sr_file_mappings == NULL) {
2273 		error = ENOMEM;
2274 		goto done;
2275 	}
2276 	mappings_next = 0;
2277 	for (i = 0; i < files_count; i++) {
2278 		srfmp = &(*sr_file_mappings)[i];
2279 		srfmp->fd = files[i].sf_fd;
2280 		srfmp->mappings_count = files[i].sf_mappings_count;
2281 		srfmp->mappings = &mappings[mappings_next];
2282 		mappings_next += srfmp->mappings_count;
2283 		if (mappings_next > mappings_count) {
2284 			error = EINVAL;
2285 			goto done;
2286 		}
2287 		srfmp->slide = files[i].sf_slide;
2288 	}
2289 
2290 	/* get the process's shared region (setup in vm_map_exec()) */
2291 	shared_region = vm_shared_region_trim_and_get(current_task());
2292 	*shared_region_ptr = shared_region;
2293 	if (shared_region == NULL) {
2294 		SHARED_REGION_TRACE_ERROR(
2295 			("shared_region: %p [%d(%s)] map(): "
2296 			"no shared region\n",
2297 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2298 			proc_getpid(p), p->p_comm));
2299 		error = EINVAL;
2300 		goto done;
2301 	}
2302 
2303 	/*
2304 	 * Check the shared region matches the current root
2305 	 * directory of this process.  Deny the mapping to
2306 	 * avoid tainting the shared region with something that
2307 	 * doesn't quite belong into it.
2308 	 */
2309 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2310 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2311 		SHARED_REGION_TRACE_ERROR(
2312 			("shared_region: map(%p) root_dir mismatch\n",
2313 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2314 		error = EPERM;
2315 		goto done;
2316 	}
2317 
2318 
2319 	for (srfmp = &(*sr_file_mappings)[0];
2320 	    srfmp < &(*sr_file_mappings)[files_count];
2321 	    srfmp++) {
2322 		if (srfmp->mappings_count == 0) {
2323 			/* no mappings here... */
2324 			continue;
2325 		}
2326 
2327 		/*
2328 		 * A file descriptor of -1 is used to indicate that the data
2329 		 * to be put in the shared region for this mapping comes directly
2330 		 * from the processes address space. Ensure we have proper alignments.
2331 		 */
2332 		if (srfmp->fd == -1) {
2333 			/* only allow one mapping per fd */
2334 			if (srfmp->mappings_count > 1) {
2335 				SHARED_REGION_TRACE_ERROR(
2336 					("shared_region: %p [%d(%s)] map data >1 mapping\n",
2337 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2338 					proc_getpid(p), p->p_comm));
2339 				error = EINVAL;
2340 				goto done;
2341 			}
2342 
2343 			/*
2344 			 * The destination address and size must be page aligned.
2345 			 */
2346 			struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2347 			mach_vm_address_t dest_addr = mapping->sms_address;
2348 			mach_vm_size_t    map_size = mapping->sms_size;
2349 			if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2350 				SHARED_REGION_TRACE_ERROR(
2351 					("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2352 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2353 					proc_getpid(p), p->p_comm, dest_addr));
2354 				error = EINVAL;
2355 				goto done;
2356 			}
2357 			if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2358 				SHARED_REGION_TRACE_ERROR(
2359 					("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2360 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2361 					proc_getpid(p), p->p_comm, map_size));
2362 				error = EINVAL;
2363 				goto done;
2364 			}
2365 			continue;
2366 		}
2367 
2368 		/* get file structure from file descriptor */
2369 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2370 		if (error) {
2371 			SHARED_REGION_TRACE_ERROR(
2372 				("shared_region: %p [%d(%s)] map: "
2373 				"fd=%d lookup failed (error=%d)\n",
2374 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2375 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2376 			goto done;
2377 		}
2378 
2379 		/* we need at least read permission on the file */
2380 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2381 			SHARED_REGION_TRACE_ERROR(
2382 				("shared_region: %p [%d(%s)] map: "
2383 				"fd=%d not readable\n",
2384 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2385 				proc_getpid(p), p->p_comm, srfmp->fd));
2386 			error = EPERM;
2387 			goto done;
2388 		}
2389 
2390 		/* get vnode from file structure */
2391 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2392 		if (error) {
2393 			SHARED_REGION_TRACE_ERROR(
2394 				("shared_region: %p [%d(%s)] map: "
2395 				"fd=%d getwithref failed (error=%d)\n",
2396 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2397 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2398 			goto done;
2399 		}
2400 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2401 
2402 		/* make sure the vnode is a regular file */
2403 		if (srfmp->vp->v_type != VREG) {
2404 			SHARED_REGION_TRACE_ERROR(
2405 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2406 				"not a file (type=%d)\n",
2407 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2408 				proc_getpid(p), p->p_comm,
2409 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2410 				srfmp->vp->v_name, srfmp->vp->v_type));
2411 			error = EINVAL;
2412 			goto done;
2413 		}
2414 
2415 #if CONFIG_MACF
2416 		/* pass in 0 for the offset argument because AMFI does not need the offset
2417 		 *       of the shared cache */
2418 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2419 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2420 		if (error) {
2421 			goto done;
2422 		}
2423 #endif /* MAC */
2424 
2425 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2426 		/*
2427 		 * Check if the shared cache is in the trust cache;
2428 		 * if so, we can skip the root ownership check.
2429 		 */
2430 #if DEVELOPMENT || DEBUG
2431 		/*
2432 		 * Skip both root ownership and trust cache check if
2433 		 * enforcement is disabled.
2434 		 */
2435 		if (!cs_system_enforcement()) {
2436 			goto after_root_check;
2437 		}
2438 #endif /* DEVELOPMENT || DEBUG */
2439 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2440 		if (blob == NULL) {
2441 			SHARED_REGION_TRACE_ERROR(
2442 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2443 				"missing CS blob\n",
2444 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2445 				proc_getpid(p), p->p_comm,
2446 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2447 				srfmp->vp->v_name));
2448 			goto root_check;
2449 		}
2450 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2451 		if (cdhash == NULL) {
2452 			SHARED_REGION_TRACE_ERROR(
2453 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2454 				"missing cdhash\n",
2455 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2456 				proc_getpid(p), p->p_comm,
2457 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2458 				srfmp->vp->v_name));
2459 			goto root_check;
2460 		}
2461 
2462 		bool in_trust_cache = false;
2463 		TrustCacheQueryToken_t qt;
2464 		if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2465 			TCType_t tc_type = kTCTypeInvalid;
2466 			TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2467 			in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2468 			    (tc_type == kTCTypeCryptex1BootOS ||
2469 			    tc_type == kTCTypeStatic ||
2470 			    tc_type == kTCTypeEngineering));
2471 		}
2472 		if (!in_trust_cache) {
2473 			SHARED_REGION_TRACE_ERROR(
2474 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2475 				"not in trust cache\n",
2476 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2477 				proc_getpid(p), p->p_comm,
2478 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2479 				srfmp->vp->v_name));
2480 			goto root_check;
2481 		}
2482 		goto after_root_check;
2483 root_check:
2484 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2485 
2486 		/* The shared cache file must be owned by root */
2487 		VATTR_INIT(&va);
2488 		VATTR_WANTED(&va, va_uid);
2489 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2490 		if (error) {
2491 			SHARED_REGION_TRACE_ERROR(
2492 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2493 				"vnode_getattr(%p) failed (error=%d)\n",
2494 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2495 				proc_getpid(p), p->p_comm,
2496 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2497 				srfmp->vp->v_name,
2498 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2499 				error));
2500 			goto done;
2501 		}
2502 		if (va.va_uid != 0) {
2503 			SHARED_REGION_TRACE_ERROR(
2504 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2505 				"owned by uid=%d instead of 0\n",
2506 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2507 				proc_getpid(p), p->p_comm,
2508 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2509 				srfmp->vp->v_name, va.va_uid));
2510 			error = EPERM;
2511 			goto done;
2512 		}
2513 
2514 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2515 after_root_check:
2516 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2517 
2518 #if CONFIG_CSR
2519 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2520 			VATTR_INIT(&va);
2521 			VATTR_WANTED(&va, va_flags);
2522 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2523 			if (error) {
2524 				SHARED_REGION_TRACE_ERROR(
2525 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2526 					"vnode_getattr(%p) failed (error=%d)\n",
2527 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2528 					proc_getpid(p), p->p_comm,
2529 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2530 					srfmp->vp->v_name,
2531 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2532 					error));
2533 				goto done;
2534 			}
2535 
2536 			if (!(va.va_flags & SF_RESTRICTED)) {
2537 				/*
2538 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2539 				 * the shared cache file is NOT SIP-protected, so reject the
2540 				 * mapping request
2541 				 */
2542 				SHARED_REGION_TRACE_ERROR(
2543 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2544 					"vnode is not SIP-protected. \n",
2545 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2546 					proc_getpid(p), p->p_comm,
2547 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2548 					srfmp->vp->v_name));
2549 				error = EPERM;
2550 				goto done;
2551 			}
2552 		}
2553 #else /* CONFIG_CSR */
2554 
2555 		/*
2556 		 * Devices without SIP/ROSP need to make sure that the shared cache
2557 		 * is either on the root volume or in the preboot cryptex volume.
2558 		 */
2559 		assert(rdir_vp != NULL);
2560 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2561 			vnode_t preboot_vp = NULL;
2562 #if XNU_TARGET_OS_OSX
2563 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2564 #else
2565 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2566 #endif
2567 			error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2568 			if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2569 				SHARED_REGION_TRACE_ERROR(
2570 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2571 					"not on process' root volume nor preboot volume\n",
2572 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2573 					proc_getpid(p), p->p_comm,
2574 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2575 					srfmp->vp->v_name));
2576 				error = EPERM;
2577 				if (preboot_vp) {
2578 					(void)vnode_put(preboot_vp);
2579 				}
2580 				goto done;
2581 			} else if (preboot_vp) {
2582 				(void)vnode_put(preboot_vp);
2583 			}
2584 		}
2585 #endif /* CONFIG_CSR */
2586 
2587 		if (scdir_enforce) {
2588 			char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2589 			struct vnode *scdir_vp = NULL;
2590 			for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2591 			    *expected_scdir_path != NULL;
2592 			    expected_scdir_path++) {
2593 				/* get vnode for expected_scdir_path */
2594 				error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2595 				if (error) {
2596 					SHARED_REGION_TRACE_ERROR(
2597 						("shared_region: %p [%d(%s)]: "
2598 						"vnode_lookup(%s) failed (error=%d)\n",
2599 						(void *)VM_KERNEL_ADDRPERM(current_thread()),
2600 						proc_getpid(p), p->p_comm,
2601 						*expected_scdir_path, error));
2602 					continue;
2603 				}
2604 
2605 				/* check if parent is scdir_vp */
2606 				assert(scdir_vp != NULL);
2607 				if (vnode_parent(srfmp->vp) == scdir_vp) {
2608 					(void)vnode_put(scdir_vp);
2609 					scdir_vp = NULL;
2610 					goto scdir_ok;
2611 				}
2612 				(void)vnode_put(scdir_vp);
2613 				scdir_vp = NULL;
2614 			}
2615 			/* nothing matches */
2616 			SHARED_REGION_TRACE_ERROR(
2617 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2618 				"shared cache file not in expected directory\n",
2619 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2620 				proc_getpid(p), p->p_comm,
2621 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2622 				srfmp->vp->v_name));
2623 			error = EPERM;
2624 			goto done;
2625 		}
2626 scdir_ok:
2627 
2628 		/* get vnode size */
2629 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2630 		if (error) {
2631 			SHARED_REGION_TRACE_ERROR(
2632 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2633 				"vnode_size(%p) failed (error=%d)\n",
2634 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2635 				proc_getpid(p), p->p_comm,
2636 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2637 				srfmp->vp->v_name,
2638 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2639 			goto done;
2640 		}
2641 		srfmp->file_size = fs;
2642 
2643 		/* get the file's memory object handle */
2644 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2645 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2646 			SHARED_REGION_TRACE_ERROR(
2647 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2648 				"no memory object\n",
2649 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2650 				proc_getpid(p), p->p_comm,
2651 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2652 				srfmp->vp->v_name));
2653 			error = EINVAL;
2654 			goto done;
2655 		}
2656 
2657 		/* check that the mappings are properly covered by code signatures */
2658 		if (!cs_system_enforcement()) {
2659 			/* code signing is not enforced: no need to check */
2660 		} else {
2661 			for (i = 0; i < srfmp->mappings_count; i++) {
2662 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2663 					/* zero-filled mapping: not backed by the file */
2664 					continue;
2665 				}
2666 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2667 				    srfmp->mappings[i].sms_file_offset,
2668 				    srfmp->mappings[i].sms_size)) {
2669 					/* this mapping is fully covered by code signatures */
2670 					continue;
2671 				}
2672 				SHARED_REGION_TRACE_ERROR(
2673 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2674 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2675 					"is not code-signed\n",
2676 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2677 					proc_getpid(p), p->p_comm,
2678 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2679 					srfmp->vp->v_name,
2680 					i, srfmp->mappings_count,
2681 					srfmp->mappings[i].sms_address,
2682 					srfmp->mappings[i].sms_size,
2683 					srfmp->mappings[i].sms_file_offset,
2684 					srfmp->mappings[i].sms_max_prot,
2685 					srfmp->mappings[i].sms_init_prot));
2686 				error = EINVAL;
2687 				goto done;
2688 			}
2689 		}
2690 	}
2691 done:
2692 	if (error != 0) {
2693 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2694 		*sr_file_mappings = NULL;
2695 		*shared_region_ptr = NULL;
2696 	}
2697 	return error;
2698 }
2699 
2700 /*
2701  * shared_region_map_np()
2702  *
2703  * This system call is intended for dyld.
2704  *
2705  * dyld uses this to map a shared cache file into a shared region.
2706  * This is usually done only the first time a shared cache is needed.
2707  * Subsequent processes will just use the populated shared region without
2708  * requiring any further setup.
2709  */
2710 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2711 _shared_region_map_and_slide(
2712 	struct proc                         *p,
2713 	uint32_t                            files_count,
2714 	struct shared_file_np               *files,
2715 	uint32_t                            mappings_count,
2716 	struct shared_file_mapping_slide_np *mappings)
2717 {
2718 	int                             error = 0;
2719 	kern_return_t                   kr = KERN_SUCCESS;
2720 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2721 	struct vnode                    *rdir_vp = NULL;
2722 	struct vm_shared_region         *shared_region = NULL;
2723 
2724 	/*
2725 	 * Get a reference to the current proc's root dir.
2726 	 * Need this to prevent racing with chroot.
2727 	 */
2728 	proc_fdlock(p);
2729 	rdir_vp = p->p_fd.fd_rdir;
2730 	if (rdir_vp == NULL) {
2731 		rdir_vp = rootvnode;
2732 	}
2733 	assert(rdir_vp != NULL);
2734 	vnode_get(rdir_vp);
2735 	proc_fdunlock(p);
2736 
2737 	/*
2738 	 * Turn files, mappings into sr_file_mappings and other setup.
2739 	 */
2740 	error = shared_region_map_and_slide_setup(p, files_count,
2741 	    files, mappings_count, mappings,
2742 	    &sr_file_mappings, &shared_region, rdir_vp);
2743 	if (error != 0) {
2744 		vnode_put(rdir_vp);
2745 		return error;
2746 	}
2747 
2748 	/* map the file(s) into that shared region's submap */
2749 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2750 	if (kr != KERN_SUCCESS) {
2751 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2752 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2753 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2754 		    proc_getpid(p), p->p_comm, kr));
2755 	}
2756 
2757 	/* convert kern_return_t to errno */
2758 	switch (kr) {
2759 	case KERN_SUCCESS:
2760 		error = 0;
2761 		break;
2762 	case KERN_INVALID_ADDRESS:
2763 		error = EFAULT;
2764 		break;
2765 	case KERN_PROTECTION_FAILURE:
2766 		error = EPERM;
2767 		break;
2768 	case KERN_NO_SPACE:
2769 		error = ENOMEM;
2770 		break;
2771 	case KERN_FAILURE:
2772 	case KERN_INVALID_ARGUMENT:
2773 	default:
2774 		error = EINVAL;
2775 		break;
2776 	}
2777 
2778 	/*
2779 	 * Mark that this process is now using split libraries.
2780 	 */
2781 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2782 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2783 	}
2784 
2785 	vnode_put(rdir_vp);
2786 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2787 
2788 	SHARED_REGION_TRACE_DEBUG(
2789 		("shared_region: %p [%d(%s)] <- map\n",
2790 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2791 		proc_getpid(p), p->p_comm));
2792 
2793 	return error;
2794 }
2795 
2796 /*
2797  * Clean up part of _shared_region_map_and_slide()
2798  * It had to be broken out of _shared_region_map_and_slide() to
2799  * prevent compiler inlining from blowing out the stack.
2800  */
2801 __attribute__((noinline))
2802 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2803 shared_region_map_and_slide_cleanup(
2804 	struct proc              *p,
2805 	uint32_t                 files_count,
2806 	struct _sr_file_mappings *sr_file_mappings,
2807 	struct vm_shared_region  *shared_region)
2808 {
2809 	struct _sr_file_mappings *srfmp;
2810 	struct vnode_attr        va;
2811 
2812 	if (sr_file_mappings != NULL) {
2813 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2814 			if (srfmp->vp != NULL) {
2815 				vnode_lock_spin(srfmp->vp);
2816 				srfmp->vp->v_flag |= VSHARED_DYLD;
2817 				vnode_unlock(srfmp->vp);
2818 
2819 				/* update the vnode's access time */
2820 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2821 					VATTR_INIT(&va);
2822 					nanotime(&va.va_access_time);
2823 					VATTR_SET_ACTIVE(&va, va_access_time);
2824 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2825 				}
2826 
2827 #if NAMEDSTREAMS
2828 				/*
2829 				 * If the shared cache is compressed, it may
2830 				 * have a namedstream vnode instantiated for
2831 				 * for it. That namedstream vnode will also
2832 				 * have to be marked with VSHARED_DYLD.
2833 				 */
2834 				if (vnode_hasnamedstreams(srfmp->vp)) {
2835 					vnode_t svp;
2836 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2837 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2838 						vnode_lock_spin(svp);
2839 						svp->v_flag |= VSHARED_DYLD;
2840 						vnode_unlock(svp);
2841 						vnode_put(svp);
2842 					}
2843 				}
2844 #endif /* NAMEDSTREAMS */
2845 				/*
2846 				 * release the vnode...
2847 				 * ubc_map() still holds it for us in the non-error case
2848 				 */
2849 				(void) vnode_put(srfmp->vp);
2850 				srfmp->vp = NULL;
2851 			}
2852 			if (srfmp->fp != NULL) {
2853 				/* release the file descriptor */
2854 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2855 				srfmp->fp = NULL;
2856 			}
2857 		}
2858 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2859 	}
2860 
2861 	if (shared_region != NULL) {
2862 		vm_shared_region_deallocate(shared_region);
2863 	}
2864 }
2865 
2866 
2867 /*
2868  * For each file mapped, we may have mappings for:
2869  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2870  * so let's round up to 8 mappings per file.
2871  */
2872 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2873 
2874 /*
2875  * This is the new interface for setting up shared region mappings.
2876  *
2877  * The slide used for shared regions setup using this interface is done differently
2878  * from the old interface. The slide value passed in the shared_files_np represents
2879  * a max value. The kernel will choose a random value based on that, then use it
2880  * for all shared regions.
2881  */
2882 #if defined (__x86_64__)
2883 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2884 #else
2885 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2886 #endif
2887 
2888 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2889 shared_region_map_and_slide_2_np(
2890 	struct proc                                  *p,
2891 	struct shared_region_map_and_slide_2_np_args *uap,
2892 	__unused int                                 *retvalp)
2893 {
2894 	unsigned int                  files_count;
2895 	struct shared_file_np         *shared_files = NULL;
2896 	unsigned int                  mappings_count;
2897 	struct shared_file_mapping_slide_np *mappings = NULL;
2898 	kern_return_t                 kr = KERN_SUCCESS;
2899 
2900 	files_count = uap->files_count;
2901 	mappings_count = uap->mappings_count;
2902 
2903 	if (files_count == 0) {
2904 		SHARED_REGION_TRACE_INFO(
2905 			("shared_region: %p [%d(%s)] map(): "
2906 			"no files\n",
2907 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2908 			proc_getpid(p), p->p_comm));
2909 		kr = 0; /* no files to map: we're done ! */
2910 		goto done;
2911 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2912 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2913 		if (shared_files == NULL) {
2914 			kr = KERN_RESOURCE_SHORTAGE;
2915 			goto done;
2916 		}
2917 	} else {
2918 		SHARED_REGION_TRACE_ERROR(
2919 			("shared_region: %p [%d(%s)] map(): "
2920 			"too many files (%d) max %d\n",
2921 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2922 			proc_getpid(p), p->p_comm,
2923 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2924 		kr = KERN_FAILURE;
2925 		goto done;
2926 	}
2927 
2928 	if (mappings_count == 0) {
2929 		SHARED_REGION_TRACE_INFO(
2930 			("shared_region: %p [%d(%s)] map(): "
2931 			"no mappings\n",
2932 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2933 			proc_getpid(p), p->p_comm));
2934 		kr = 0; /* no mappings: we're done ! */
2935 		goto done;
2936 	} else if (mappings_count <= SFM_MAX) {
2937 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2938 		if (mappings == NULL) {
2939 			kr = KERN_RESOURCE_SHORTAGE;
2940 			goto done;
2941 		}
2942 	} else {
2943 		SHARED_REGION_TRACE_ERROR(
2944 			("shared_region: %p [%d(%s)] map(): "
2945 			"too many mappings (%d) max %d\n",
2946 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2947 			proc_getpid(p), p->p_comm,
2948 			mappings_count, SFM_MAX));
2949 		kr = KERN_FAILURE;
2950 		goto done;
2951 	}
2952 
2953 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2954 	if (kr != KERN_SUCCESS) {
2955 		goto done;
2956 	}
2957 
2958 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2959 	if (kr != KERN_SUCCESS) {
2960 		goto done;
2961 	}
2962 
2963 	uint32_t max_slide = shared_files[0].sf_slide;
2964 	uint32_t random_val;
2965 	uint32_t slide_amount;
2966 
2967 	if (max_slide != 0) {
2968 		read_random(&random_val, sizeof random_val);
2969 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2970 	} else {
2971 		slide_amount = 0;
2972 	}
2973 #if DEVELOPMENT || DEBUG
2974 	extern bool bootarg_disable_aslr;
2975 	if (bootarg_disable_aslr) {
2976 		slide_amount = 0;
2977 	}
2978 #endif /* DEVELOPMENT || DEBUG */
2979 
2980 	/*
2981 	 * Fix up the mappings to reflect the desired slide.
2982 	 */
2983 	unsigned int f;
2984 	unsigned int m = 0;
2985 	unsigned int i;
2986 	for (f = 0; f < files_count; ++f) {
2987 		shared_files[f].sf_slide = slide_amount;
2988 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2989 			if (m >= mappings_count) {
2990 				SHARED_REGION_TRACE_ERROR(
2991 					("shared_region: %p [%d(%s)] map(): "
2992 					"mapping count argument was too small\n",
2993 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2994 					proc_getpid(p), p->p_comm));
2995 				kr = KERN_FAILURE;
2996 				goto done;
2997 			}
2998 			mappings[m].sms_address += slide_amount;
2999 			if (mappings[m].sms_slide_size != 0) {
3000 				mappings[m].sms_slide_start += slide_amount;
3001 			}
3002 		}
3003 	}
3004 
3005 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
3006 done:
3007 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
3008 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
3009 	return kr;
3010 }
3011 
3012 /*
3013  * A syscall for dyld to use to map data pages that need load time relocation fixups.
3014  * The fixups are performed by a custom pager during page-in, so the pages still appear
3015  * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3016  * on demand later, all w/o using the compressor.
3017  *
3018  * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3019  * running, they are COW'd as normal.
3020  */
3021 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)3022 map_with_linking_np(
3023 	struct proc                     *p,
3024 	struct map_with_linking_np_args *uap,
3025 	__unused int                    *retvalp)
3026 {
3027 	uint32_t                        region_count;
3028 	uint32_t                        r;
3029 	struct mwl_region               *regions = NULL;
3030 	struct mwl_region               *rp;
3031 	uint32_t                        link_info_size;
3032 	void                            *link_info = NULL;      /* starts with a struct mwl_info_hdr */
3033 	struct mwl_info_hdr             *info_hdr = NULL;
3034 	uint64_t                        binds_size;
3035 	int                             fd;
3036 	struct fileproc                 *fp = NULL;
3037 	struct vnode                    *vp = NULL;
3038 	size_t                          file_size;
3039 	off_t                           fs;
3040 	struct vnode_attr               va;
3041 	memory_object_control_t         file_control = NULL;
3042 	int                             error;
3043 	kern_return_t                   kr = KERN_SUCCESS;
3044 
3045 	/*
3046 	 * Check if dyld has told us it finished with this call.
3047 	 */
3048 	if (p->p_disallow_map_with_linking) {
3049 		printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3050 		    __func__, proc_getpid(p), p->p_comm);
3051 		kr = KERN_FAILURE;
3052 		goto done;
3053 	}
3054 
3055 	/*
3056 	 * First we do some sanity checking on what dyld has passed us.
3057 	 */
3058 	region_count = uap->region_count;
3059 	link_info_size = uap->link_info_size;
3060 	if (region_count == 0) {
3061 		printf("%s: [%d(%s)]: region_count == 0\n",
3062 		    __func__, proc_getpid(p), p->p_comm);
3063 		kr = KERN_FAILURE;
3064 		goto done;
3065 	}
3066 	if (region_count > MWL_MAX_REGION_COUNT) {
3067 		printf("%s: [%d(%s)]: region_count too big %d\n",
3068 		    __func__, proc_getpid(p), p->p_comm, region_count);
3069 		kr = KERN_FAILURE;
3070 		goto done;
3071 	}
3072 
3073 	if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3074 		printf("%s: [%d(%s)]: link_info_size too small\n",
3075 		    __func__, proc_getpid(p), p->p_comm);
3076 		kr = KERN_FAILURE;
3077 		goto done;
3078 	}
3079 	if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3080 		printf("%s: [%d(%s)]: link_info_size too big %d\n",
3081 		    __func__, proc_getpid(p), p->p_comm, link_info_size);
3082 		kr = KERN_FAILURE;
3083 		goto done;
3084 	}
3085 
3086 	/*
3087 	 * Allocate and copyin the regions and link info
3088 	 */
3089 	regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3090 	if (regions == NULL) {
3091 		printf("%s: [%d(%s)]: failed to allocate regions\n",
3092 		    __func__, proc_getpid(p), p->p_comm);
3093 		kr = KERN_RESOURCE_SHORTAGE;
3094 		goto done;
3095 	}
3096 	kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3097 	if (kr != KERN_SUCCESS) {
3098 		printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3099 		    __func__, proc_getpid(p), p->p_comm, kr);
3100 		goto done;
3101 	}
3102 
3103 	link_info = kalloc_data(link_info_size, Z_WAITOK);
3104 	if (link_info == NULL) {
3105 		printf("%s: [%d(%s)]: failed to allocate link_info\n",
3106 		    __func__, proc_getpid(p), p->p_comm);
3107 		kr = KERN_RESOURCE_SHORTAGE;
3108 		goto done;
3109 	}
3110 	kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3111 	if (kr != KERN_SUCCESS) {
3112 		printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3113 		    __func__, proc_getpid(p), p->p_comm, kr);
3114 		goto done;
3115 	}
3116 
3117 	/*
3118 	 * Do some verification the data structures.
3119 	 */
3120 	info_hdr = (struct mwl_info_hdr *)link_info;
3121 	if (info_hdr->mwli_version != MWL_INFO_VERS) {
3122 		printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3123 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3124 		kr = KERN_FAILURE;
3125 		goto done;
3126 	}
3127 
3128 	if (info_hdr->mwli_binds_offset > link_info_size) {
3129 		printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3130 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3131 		kr = KERN_FAILURE;
3132 		goto done;
3133 	}
3134 
3135 	/* some older devs have s/w page size > h/w page size, no need to support them */
3136 	if (info_hdr->mwli_page_size != PAGE_SIZE) {
3137 		/* no printf, since this is expected on some devices */
3138 		kr = KERN_INVALID_ARGUMENT;
3139 		goto done;
3140 	}
3141 
3142 	binds_size = (uint64_t)info_hdr->mwli_binds_count *
3143 	    ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3144 	if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3145 		printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3146 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3147 		kr = KERN_FAILURE;
3148 		goto done;
3149 	}
3150 
3151 	if (info_hdr->mwli_chains_offset > link_info_size) {
3152 		printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3153 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3154 		kr = KERN_FAILURE;
3155 		goto done;
3156 	}
3157 
3158 
3159 	/*
3160 	 * Ensure the chained starts in the link info and make sure the
3161 	 * segment info offsets are within bounds.
3162 	 */
3163 	if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3164 		printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3165 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3166 		kr = KERN_FAILURE;
3167 		goto done;
3168 	}
3169 	if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3170 		printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3171 		    __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3172 		kr = KERN_FAILURE;
3173 		goto done;
3174 	}
3175 
3176 	/* Note that more verification of offsets is done in the pager itself */
3177 
3178 	/*
3179 	 * Ensure we've only been given one FD and verify valid protections.
3180 	 */
3181 	fd = regions[0].mwlr_fd;
3182 	for (r = 0; r < region_count; ++r) {
3183 		if (regions[r].mwlr_fd != fd) {
3184 			printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3185 			    __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3186 			kr = KERN_FAILURE;
3187 			goto done;
3188 		}
3189 
3190 		/*
3191 		 * Only allow data mappings and not zero fill. Permit TPRO
3192 		 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
3193 		 */
3194 		if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3195 			printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3196 			    __func__, proc_getpid(p), p->p_comm);
3197 			kr = KERN_FAILURE;
3198 			goto done;
3199 		}
3200 		if (regions[r].mwlr_protections & VM_PROT_ZF) {
3201 			printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
3202 			    __func__, proc_getpid(p), p->p_comm, r);
3203 			kr = KERN_FAILURE;
3204 			goto done;
3205 		}
3206 		if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
3207 		    !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
3208 			printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
3209 			    __func__, proc_getpid(p), p->p_comm, r);
3210 			kr = KERN_FAILURE;
3211 			goto done;
3212 		}
3213 	}
3214 
3215 
3216 	/* get file structure from file descriptor */
3217 	error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3218 	if (error) {
3219 		printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3220 		    __func__, proc_getpid(p), p->p_comm, error);
3221 		kr = KERN_FAILURE;
3222 		goto done;
3223 	}
3224 
3225 	/* We need at least read permission on the file */
3226 	if (!(fp->fp_glob->fg_flag & FREAD)) {
3227 		printf("%s: [%d(%s)]: not readable\n",
3228 		    __func__, proc_getpid(p), p->p_comm);
3229 		kr = KERN_FAILURE;
3230 		goto done;
3231 	}
3232 
3233 	/* Get the vnode from file structure */
3234 	vp = (struct vnode *)fp_get_data(fp);
3235 	error = vnode_getwithref(vp);
3236 	if (error) {
3237 		printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3238 		    __func__, proc_getpid(p), p->p_comm, error);
3239 		kr = KERN_FAILURE;
3240 		vp = NULL; /* just to be sure */
3241 		goto done;
3242 	}
3243 
3244 	/* Make sure the vnode is a regular file */
3245 	if (vp->v_type != VREG) {
3246 		printf("%s: [%d(%s)]: vnode not VREG\n",
3247 		    __func__, proc_getpid(p), p->p_comm);
3248 		kr = KERN_FAILURE;
3249 		goto done;
3250 	}
3251 
3252 	/* get vnode size */
3253 	error = vnode_size(vp, &fs, vfs_context_current());
3254 	if (error) {
3255 		goto done;
3256 	}
3257 	file_size = fs;
3258 
3259 	/* get the file's memory object handle */
3260 	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3261 	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3262 		printf("%s: [%d(%s)]: no memory object\n",
3263 		    __func__, proc_getpid(p), p->p_comm);
3264 		kr = KERN_FAILURE;
3265 		goto done;
3266 	}
3267 
3268 	for (r = 0; r < region_count; ++r) {
3269 		rp = &regions[r];
3270 
3271 #if CONFIG_MACF
3272 		vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3273 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3274 		    fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3275 		if (error) {
3276 			printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3277 			    __func__, proc_getpid(p), p->p_comm, r, error);
3278 			kr = KERN_FAILURE;
3279 			goto done;
3280 		}
3281 #endif /* MAC */
3282 
3283 		/* check that the mappings are properly covered by code signatures */
3284 		if (cs_system_enforcement()) {
3285 			if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3286 				printf("%s: [%d(%s)]: region %d, not code signed\n",
3287 				    __func__, proc_getpid(p), p->p_comm, r);
3288 				kr = KERN_FAILURE;
3289 				goto done;
3290 			}
3291 		}
3292 	}
3293 
3294 	/* update the vnode's access time */
3295 	if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3296 		VATTR_INIT(&va);
3297 		nanotime(&va.va_access_time);
3298 		VATTR_SET_ACTIVE(&va, va_access_time);
3299 		vnode_setattr(vp, &va, vfs_context_current());
3300 	}
3301 
3302 	/* get the VM to do the work */
3303 	kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3304 
3305 done:
3306 	if (fp != NULL) {
3307 		/* release the file descriptor */
3308 		fp_drop(p, fd, fp, 0);
3309 	}
3310 	if (vp != NULL) {
3311 		(void)vnode_put(vp);
3312 	}
3313 	if (regions != NULL) {
3314 		kfree_data(regions, region_count * sizeof(regions[0]));
3315 	}
3316 	/* link info is used in the pager if things worked */
3317 	if (link_info != NULL && kr != KERN_SUCCESS) {
3318 		kfree_data(link_info, link_info_size);
3319 	}
3320 
3321 	switch (kr) {
3322 	case KERN_SUCCESS:
3323 		return 0;
3324 	case KERN_RESOURCE_SHORTAGE:
3325 		return ENOMEM;
3326 	default:
3327 		return EINVAL;
3328 	}
3329 }
3330 
3331 #if DEBUG || DEVELOPMENT
3332 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3333     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3334 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3335     CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3336 #endif /* DEBUG || DEVELOPMENT */
3337 
3338 /* sysctl overflow room */
3339 
3340 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3341     (int *) &page_size, 0, "vm page size");
3342 
3343 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3344  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3345  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3346 extern unsigned int     vm_page_free_target;
3347 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3348     &vm_page_free_target, 0, "Pageout daemon free target");
3349 
3350 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3351     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3352 
3353 static int
3354 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3355 {
3356 #pragma unused(oidp, arg1, arg2)
3357 	unsigned int page_free_wanted;
3358 
3359 	page_free_wanted = mach_vm_ctl_page_free_wanted();
3360 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3361 }
3362 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3363     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3364     0, 0, vm_ctl_page_free_wanted, "I", "");
3365 
3366 extern unsigned int     vm_page_purgeable_count;
3367 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3368     &vm_page_purgeable_count, 0, "Purgeable page count");
3369 
3370 extern unsigned int     vm_page_purgeable_wired_count;
3371 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3372     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3373 
3374 extern unsigned int vm_page_kern_lpage_count;
3375 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3376     &vm_page_kern_lpage_count, 0, "kernel used large pages");
3377 
3378 #if DEVELOPMENT || DEBUG
3379 #if __ARM_MIXED_PAGE_SIZE__
3380 static int vm_mixed_pagesize_supported = 1;
3381 #else
3382 static int vm_mixed_pagesize_supported = 0;
3383 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3384 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3385     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3386 
3387 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3388 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3389 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3390     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3391 
3392 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3393     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3394 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3395     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3396 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3397     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3398 
3399 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3400     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3401 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3402     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3403 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3404     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3405 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3406     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3407 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3408     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3409 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3410     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3411 #endif /* DEVELOPMENT || DEBUG */
3412 
3413 extern int madvise_free_debug;
3414 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3415     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3416 extern int madvise_free_debug_sometimes;
3417 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3418     &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3419 
3420 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3421     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3422 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3423     &vm_page_stats_reusable.reusable_pages_success, "");
3424 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3425     &vm_page_stats_reusable.reusable_pages_failure, "");
3426 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3427     &vm_page_stats_reusable.reusable_pages_shared, "");
3428 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3429     &vm_page_stats_reusable.all_reusable_calls, "");
3430 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3431     &vm_page_stats_reusable.partial_reusable_calls, "");
3432 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3433     &vm_page_stats_reusable.reuse_pages_success, "");
3434 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3435     &vm_page_stats_reusable.reuse_pages_failure, "");
3436 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3437     &vm_page_stats_reusable.all_reuse_calls, "");
3438 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3439     &vm_page_stats_reusable.partial_reuse_calls, "");
3440 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3441     &vm_page_stats_reusable.can_reuse_success, "");
3442 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3443     &vm_page_stats_reusable.can_reuse_failure, "");
3444 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3445     &vm_page_stats_reusable.reusable_reclaimed, "");
3446 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3447     &vm_page_stats_reusable.reusable_nonwritable, "");
3448 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3449     &vm_page_stats_reusable.reusable_shared, "");
3450 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3451     &vm_page_stats_reusable.free_shared, "");
3452 
3453 
3454 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3455 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3456 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3457 
3458 extern unsigned int vm_page_cleaned_count;
3459 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3460 
3461 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3462 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3463 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3464 
3465 /* pageout counts */
3466 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3467 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3468 
3469 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3470 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3471 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3472 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3473 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3474 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3475 
3476 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3477 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3478 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3479 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3480 extern unsigned int vm_page_realtime_count;
3481 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3482 extern int vm_pageout_protect_realtime;
3483 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3484 
3485 /* counts of pages prefaulted when entering a memory object */
3486 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3487 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3488 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3489 
3490 #if defined (__x86_64__)
3491 extern unsigned int vm_clump_promote_threshold;
3492 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3493 #if DEVELOPMENT || DEBUG
3494 extern unsigned long vm_clump_stats[];
3495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3500 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3503 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3504 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3505 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3506 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3507 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3508 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3509 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3510 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3511 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3512 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3513 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3514 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3515 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3516 #endif  /* if DEVELOPMENT || DEBUG */
3517 #endif  /* #if defined (__x86_64__) */
3518 
3519 #if CONFIG_SECLUDED_MEMORY
3520 
3521 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3522 extern unsigned int vm_page_secluded_target;
3523 extern unsigned int vm_page_secluded_count;
3524 extern unsigned int vm_page_secluded_count_free;
3525 extern unsigned int vm_page_secluded_count_inuse;
3526 extern unsigned int vm_page_secluded_count_over_target;
3527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3530 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3531 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3532 
3533 extern struct vm_page_secluded_data vm_page_secluded;
3534 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3535 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3536 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3537 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3538 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3539 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3540 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3541 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3542 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3543 
3544 #endif /* CONFIG_SECLUDED_MEMORY */
3545 
3546 #pragma mark Deferred Reclaim
3547 
3548 #if CONFIG_DEFERRED_RECLAIM
3549 
3550 #if DEVELOPMENT || DEBUG
3551 /*
3552  * VM reclaim testing
3553  */
3554 extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
3555 
3556 static int
3557 sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
3558 {
3559 #pragma unused(arg1, arg2)
3560 	int error = EINVAL, pid = 0;
3561 	/*
3562 	 * Only send on write
3563 	 */
3564 	error = sysctl_handle_int(oidp, &pid, 0, req);
3565 	if (error || !req->newptr) {
3566 		return error;
3567 	}
3568 
3569 	bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
3570 	if (success) {
3571 		error = 0;
3572 	}
3573 
3574 	return error;
3575 }
3576 
3577 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
3578     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
3579     &sysctl_vm_reclaim_drain_async_queue, "I", "");
3580 
3581 
3582 extern uint64_t vm_reclaim_max_threshold;
3583 extern uint64_t vm_reclaim_trim_divisor;
3584 
3585 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
3586 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
3587 #endif /* DEVELOPMENT || DEBUG */
3588 
3589 #endif /* CONFIG_DEFERRED_RECLAIM */
3590 
3591 #include <kern/thread.h>
3592 #include <sys/user.h>
3593 
3594 void vm_pageout_io_throttle(void);
3595 
3596 void
vm_pageout_io_throttle(void)3597 vm_pageout_io_throttle(void)
3598 {
3599 	struct uthread *uthread = current_uthread();
3600 
3601 	/*
3602 	 * thread is marked as a low priority I/O type
3603 	 * and the I/O we issued while in this cleaning operation
3604 	 * collided with normal I/O operations... we'll
3605 	 * delay in order to mitigate the impact of this
3606 	 * task on the normal operation of the system
3607 	 */
3608 
3609 	if (uthread->uu_lowpri_window) {
3610 		throttle_lowpri_io(1);
3611 	}
3612 }
3613 
3614 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3615 vm_pressure_monitor(
3616 	__unused struct proc *p,
3617 	struct vm_pressure_monitor_args *uap,
3618 	int *retval)
3619 {
3620 	kern_return_t   kr;
3621 	uint32_t        pages_reclaimed;
3622 	uint32_t        pages_wanted;
3623 
3624 	kr = mach_vm_pressure_monitor(
3625 		(boolean_t) uap->wait_for_pressure,
3626 		uap->nsecs_monitored,
3627 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3628 		&pages_wanted);
3629 
3630 	switch (kr) {
3631 	case KERN_SUCCESS:
3632 		break;
3633 	case KERN_ABORTED:
3634 		return EINTR;
3635 	default:
3636 		return EINVAL;
3637 	}
3638 
3639 	if (uap->pages_reclaimed) {
3640 		if (copyout((void *)&pages_reclaimed,
3641 		    uap->pages_reclaimed,
3642 		    sizeof(pages_reclaimed)) != 0) {
3643 			return EFAULT;
3644 		}
3645 	}
3646 
3647 	*retval = (int) pages_wanted;
3648 	return 0;
3649 }
3650 
3651 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3652 kas_info(struct proc *p,
3653     struct kas_info_args *uap,
3654     int *retval __unused)
3655 {
3656 #ifndef CONFIG_KAS_INFO
3657 	(void)p;
3658 	(void)uap;
3659 	return ENOTSUP;
3660 #else /* CONFIG_KAS_INFO */
3661 	int                     selector = uap->selector;
3662 	user_addr_t     valuep = uap->value;
3663 	user_addr_t     sizep = uap->size;
3664 	user_size_t size, rsize;
3665 	int                     error;
3666 
3667 	if (!kauth_cred_issuser(kauth_cred_get())) {
3668 		return EPERM;
3669 	}
3670 
3671 #if CONFIG_MACF
3672 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3673 	if (error) {
3674 		return error;
3675 	}
3676 #endif
3677 
3678 	if (IS_64BIT_PROCESS(p)) {
3679 		user64_size_t size64;
3680 		error = copyin(sizep, &size64, sizeof(size64));
3681 		size = (user_size_t)size64;
3682 	} else {
3683 		user32_size_t size32;
3684 		error = copyin(sizep, &size32, sizeof(size32));
3685 		size = (user_size_t)size32;
3686 	}
3687 	if (error) {
3688 		return error;
3689 	}
3690 
3691 	switch (selector) {
3692 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3693 	{
3694 		uint64_t slide = vm_kernel_slide;
3695 
3696 		if (sizeof(slide) != size) {
3697 			return EINVAL;
3698 		}
3699 
3700 		error = copyout(&slide, valuep, sizeof(slide));
3701 		if (error) {
3702 			return error;
3703 		}
3704 		rsize = size;
3705 	}
3706 	break;
3707 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3708 	{
3709 		uint32_t i;
3710 		kernel_mach_header_t *mh = &_mh_execute_header;
3711 		struct load_command *cmd;
3712 		cmd = (struct load_command*) &mh[1];
3713 		uint64_t *bases;
3714 		rsize = mh->ncmds * sizeof(uint64_t);
3715 
3716 		/*
3717 		 * Return the size if no data was passed
3718 		 */
3719 		if (valuep == 0) {
3720 			break;
3721 		}
3722 
3723 		if (rsize > size) {
3724 			return EINVAL;
3725 		}
3726 
3727 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3728 
3729 		for (i = 0; i < mh->ncmds; i++) {
3730 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3731 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3732 				bases[i] = (uint64_t)sg->vmaddr;
3733 			}
3734 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3735 		}
3736 
3737 		error = copyout(bases, valuep, rsize);
3738 
3739 		kfree_data(bases, rsize);
3740 
3741 		if (error) {
3742 			return error;
3743 		}
3744 	}
3745 	break;
3746 	case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
3747 	case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
3748 	{
3749 #if CONFIG_SPTM
3750 		const uint64_t slide =
3751 		    (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
3752 #else
3753 		const uint64_t slide = 0;
3754 #endif
3755 
3756 		if (sizeof(slide) != size) {
3757 			return EINVAL;
3758 		}
3759 
3760 		error = copyout(&slide, valuep, sizeof(slide));
3761 		if (error) {
3762 			return error;
3763 		}
3764 		rsize = size;
3765 	}
3766 	break;
3767 	default:
3768 		return EINVAL;
3769 	}
3770 
3771 	if (IS_64BIT_PROCESS(p)) {
3772 		user64_size_t size64 = (user64_size_t)rsize;
3773 		error = copyout(&size64, sizep, sizeof(size64));
3774 	} else {
3775 		user32_size_t size32 = (user32_size_t)rsize;
3776 		error = copyout(&size32, sizep, sizeof(size32));
3777 	}
3778 
3779 	return error;
3780 #endif /* CONFIG_KAS_INFO */
3781 }
3782 
3783 #if __has_feature(ptrauth_calls)
3784 /*
3785  * Generate a random pointer signing key that isn't 0.
3786  */
3787 uint64_t
generate_jop_key(void)3788 generate_jop_key(void)
3789 {
3790 	uint64_t key;
3791 
3792 	do {
3793 		read_random(&key, sizeof key);
3794 	} while (key == 0);
3795 	return key;
3796 }
3797 #endif /* __has_feature(ptrauth_calls) */
3798 
3799 
3800 #pragma clang diagnostic push
3801 #pragma clang diagnostic ignored "-Wcast-qual"
3802 #pragma clang diagnostic ignored "-Wunused-function"
3803 
3804 static void
asserts()3805 asserts()
3806 {
3807 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3808 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3809 }
3810 
3811 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3812 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3813 #pragma clang diagnostic pop
3814 
3815 extern uint32_t vm_page_pages;
3816 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3817 
3818 extern uint32_t vm_page_busy_absent_skipped;
3819 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3820 
3821 extern uint32_t vm_page_upl_tainted;
3822 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3823 
3824 extern uint32_t vm_page_iopl_tainted;
3825 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3826 
3827 #if __arm64__ && (DEVELOPMENT || DEBUG)
3828 extern int vm_footprint_suspend_allowed;
3829 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3830 
3831 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3832 static int
3833 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3834 {
3835 #pragma unused(oidp, arg1, arg2)
3836 	int error = 0;
3837 	int new_value;
3838 
3839 	if (req->newptr == USER_ADDR_NULL) {
3840 		return 0;
3841 	}
3842 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3843 	if (error) {
3844 		return error;
3845 	}
3846 	if (!vm_footprint_suspend_allowed) {
3847 		if (new_value != 0) {
3848 			/* suspends are not allowed... */
3849 			return 0;
3850 		}
3851 		/* ... but let resumes proceed */
3852 	}
3853 	DTRACE_VM2(footprint_suspend,
3854 	    vm_map_t, current_map(),
3855 	    int, new_value);
3856 
3857 	pmap_footprint_suspend(current_map(), new_value);
3858 
3859 	return 0;
3860 }
3861 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3862     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3863     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3864 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3865 
3866 extern uint64_t vm_map_corpse_footprint_count;
3867 extern uint64_t vm_map_corpse_footprint_size_avg;
3868 extern uint64_t vm_map_corpse_footprint_size_max;
3869 extern uint64_t vm_map_corpse_footprint_full;
3870 extern uint64_t vm_map_corpse_footprint_no_buf;
3871 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3872     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3873 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3874     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3875 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3876     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3877 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3878     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3879 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3880     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3881 
3882 #if CODE_SIGNING_MONITOR
3883 extern uint64_t vm_cs_defer_to_csm;
3884 extern uint64_t vm_cs_defer_to_csm_not;
3885 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3886     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3887 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3888     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3889 #endif /* CODE_SIGNING_MONITOR */
3890 
3891 extern uint64_t shared_region_pager_copied;
3892 extern uint64_t shared_region_pager_slid;
3893 extern uint64_t shared_region_pager_slid_error;
3894 extern uint64_t shared_region_pager_reclaimed;
3895 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3896     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3897 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3898     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3899 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3900     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3901 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3902     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3903 extern int shared_region_destroy_delay;
3904 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3905     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3906 
3907 #if MACH_ASSERT
3908 extern int pmap_ledgers_panic_leeway;
3909 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3910 #endif /* MACH_ASSERT */
3911 
3912 
3913 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3914 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3915 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3916 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3917 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3918 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3919 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3920 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3921 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3922 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3923 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3924 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3925 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3926 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3927     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3928 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3929     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3930 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3931     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3932 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3933     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3934 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3935     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3936 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3937     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3938 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3939     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3940 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3941     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3942 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3943     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3944 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3945     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3946 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3947     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3948 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3949     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3950 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3951     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3952 
3953 extern int vm_protect_privileged_from_untrusted;
3954 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3955     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3956 extern uint64_t vm_copied_on_read;
3957 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3958     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3959 
3960 extern int vm_shared_region_count;
3961 extern int vm_shared_region_peak;
3962 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3963     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3964 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3965     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3966 #if DEVELOPMENT || DEBUG
3967 extern unsigned int shared_region_pagers_resident_count;
3968 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3969     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3970 extern unsigned int shared_region_pagers_resident_peak;
3971 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3972     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3973 extern int shared_region_pager_count;
3974 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3975     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3976 #if __has_feature(ptrauth_calls)
3977 extern int shared_region_key_count;
3978 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3979     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3980 extern int vm_shared_region_reslide_count;
3981 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3982     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3983 #endif /* __has_feature(ptrauth_calls) */
3984 #endif /* DEVELOPMENT || DEBUG */
3985 
3986 #if MACH_ASSERT
3987 extern int debug4k_filter;
3988 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3989 extern int debug4k_panic_on_terminate;
3990 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3991 extern int debug4k_panic_on_exception;
3992 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3993 extern int debug4k_panic_on_misaligned_sharing;
3994 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3995 #endif /* MACH_ASSERT */
3996 
3997 extern uint64_t vm_map_set_size_limit_count;
3998 extern uint64_t vm_map_set_data_limit_count;
3999 extern uint64_t vm_map_enter_RLIMIT_AS_count;
4000 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
4001 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
4002 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
4003 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
4004 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
4005 
4006 extern uint64_t vm_fault_resilient_media_initiate;
4007 extern uint64_t vm_fault_resilient_media_retry;
4008 extern uint64_t vm_fault_resilient_media_proceed;
4009 extern uint64_t vm_fault_resilient_media_release;
4010 extern uint64_t vm_fault_resilient_media_abort1;
4011 extern uint64_t vm_fault_resilient_media_abort2;
4012 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
4013 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
4014 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
4015 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
4016 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
4017 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
4018 #if MACH_ASSERT
4019 extern int vm_fault_resilient_media_inject_error1_rate;
4020 extern int vm_fault_resilient_media_inject_error1;
4021 extern int vm_fault_resilient_media_inject_error2_rate;
4022 extern int vm_fault_resilient_media_inject_error2;
4023 extern int vm_fault_resilient_media_inject_error3_rate;
4024 extern int vm_fault_resilient_media_inject_error3;
4025 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
4026 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
4027 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
4028 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
4029 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
4030 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
4031 #endif /* MACH_ASSERT */
4032 
4033 extern uint64_t pmap_query_page_info_retries;
4034 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
4035 
4036 /*
4037  * A sysctl which causes all existing shared regions to become stale. They
4038  * will no longer be used by anything new and will be torn down as soon as
4039  * the last existing user exits. A write of non-zero value causes that to happen.
4040  * This should only be used by launchd, so we check that this is initproc.
4041  */
4042 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4043 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4044 {
4045 	unsigned int value = 0;
4046 	int changed = 0;
4047 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
4048 	if (error || !changed) {
4049 		return error;
4050 	}
4051 	if (current_proc() != initproc) {
4052 		return EPERM;
4053 	}
4054 
4055 	vm_shared_region_pivot();
4056 
4057 	return 0;
4058 }
4059 
4060 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
4061     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
4062     0, 0, shared_region_pivot, "I", "");
4063 
4064 extern uint64_t vm_object_shadow_forced;
4065 extern uint64_t vm_object_shadow_skipped;
4066 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
4067     &vm_object_shadow_forced, "");
4068 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
4069     &vm_object_shadow_skipped, "");
4070 
4071 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
4072     &vmtc_total, 0, "total text page corruptions detected");
4073 
4074 
4075 #if DEBUG || DEVELOPMENT
4076 /*
4077  * A sysctl that can be used to corrupt a text page with an illegal instruction.
4078  * Used for testing text page self healing.
4079  */
4080 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4081 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4082 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4083 {
4084 	uint64_t value = 0;
4085 	int error = sysctl_handle_quad(oidp, &value, 0, req);
4086 	if (error || !req->newptr) {
4087 		return error;
4088 	}
4089 
4090 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4091 		return 0;
4092 	} else {
4093 		return EINVAL;
4094 	}
4095 }
4096 
4097 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4098     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4099     0, 0, corrupt_text_addr, "-", "");
4100 #endif /* DEBUG || DEVELOPMENT */
4101 
4102 #if CONFIG_MAP_RANGES
4103 /*
4104  * vm.malloc_ranges
4105  *
4106  * space-separated list of <left:right> hexadecimal addresses.
4107  */
4108 static int
4109 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
4110 {
4111 	vm_map_t map = current_map();
4112 	struct mach_vm_range r1, r2;
4113 	char str[20 * 4];
4114 	int len;
4115 
4116 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
4117 		return ENOENT;
4118 	}
4119 	if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
4120 		return ENOENT;
4121 	}
4122 
4123 	len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
4124 	    r1.max_address, r2.min_address,
4125 	    r2.max_address, get_map_max(map));
4126 
4127 	return SYSCTL_OUT(req, str, len);
4128 }
4129 
4130 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
4131     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4132     0, 0, &vm_map_malloc_ranges, "A", "");
4133 
4134 #if DEBUG || DEVELOPMENT
4135 static int
4136 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4137 {
4138 #pragma unused(arg1, arg2, oidp)
4139 	struct mach_vm_range range;
4140 
4141 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4142 	    != KERN_SUCCESS) {
4143 		return EINVAL;
4144 	}
4145 
4146 	return SYSCTL_OUT(req, &range, sizeof(range));
4147 }
4148 
4149 static int
4150 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4151 {
4152 #pragma unused(arg1, arg2, oidp)
4153 	struct mach_vm_range range;
4154 
4155 	if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4156 	    != KERN_SUCCESS) {
4157 		return EINVAL;
4158 	}
4159 
4160 	return SYSCTL_OUT(req, &range, sizeof(range));
4161 }
4162 
4163 /*
4164  * A sysctl that can be used to return ranges for the current VM map.
4165  * Used for testing VM ranges.
4166  */
4167 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4168     0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4169 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4170     0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4171 
4172 #endif /* DEBUG || DEVELOPMENT */
4173 #endif /* CONFIG_MAP_RANGES */
4174 
4175 #if DEBUG || DEVELOPMENT
4176 #endif /* DEBUG || DEVELOPMENT */
4177 
4178 extern uint64_t vm_map_range_overflows_count;
4179 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4180 extern boolean_t vm_map_range_overflows_log;
4181 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4182 
4183 extern uint64_t c_seg_filled_no_contention;
4184 extern uint64_t c_seg_filled_contention;
4185 extern clock_sec_t c_seg_filled_contention_sec_max;
4186 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4187 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4188 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4189 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4190 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4191 #if (XNU_TARGET_OS_OSX && __arm64__)
4192 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4193 extern int c_process_major_yield_after; /* yield after moving ? segments */
4194 extern uint64_t c_process_major_reports;
4195 extern clock_sec_t c_process_major_max_sec;
4196 extern clock_nsec_t c_process_major_max_nsec;
4197 extern uint32_t c_process_major_peak_segcount;
4198 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4199 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4200 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4201 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4202 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4203 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4204 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4205 
4206 #if DEVELOPMENT || DEBUG
4207 extern int panic_object_not_alive;
4208 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4209 #endif /* DEVELOPMENT || DEBUG */
4210 
4211 #if MACH_ASSERT
4212 extern int fbdp_no_panic;
4213 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4214 #endif /* MACH_ASSERT */
4215 
4216