xref: /xnu-8020.101.4/bsd/vm/vm_unix.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Mach Operating System
30  * Copyright (c) 1987 Carnegie-Mellon University
31  * All rights reserved.  The CMU software License Agreement specifies
32  * the terms and conditions for use and redistribution.
33  */
34 /*
35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36  * support for mandatory and extensible security protections.  This notice
37  * is included in support of clause 2.2 (b) of the Apple Public License,
38  * Version 2.0.
39  */
40 #include <vm/vm_options.h>
41 
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57 
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #if NECP
86 #include <net/necp.h>
87 #endif /* NECP */
88 #if SKYWALK
89 #include <skywalk/os_channel.h>
90 #endif /* SKYWALK */
91 
92 #include <security/audit/audit.h>
93 #include <security/mac.h>
94 #include <bsm/audit_kevents.h>
95 
96 #include <kern/kalloc.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100 
101 #include <mach/shared_region.h>
102 #include <vm/vm_shared_region.h>
103 
104 #include <vm/vm_protos.h>
105 
106 #include <sys/kern_memorystatus.h>
107 #include <sys/kern_memorystatus_freeze.h>
108 #include <sys/proc_internal.h>
109 
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113 
114 #include <kern/bits.h>
115 
116 #if CONFIG_CSR
117 #include <sys/csr.h>
118 #endif /* CONFIG_CSR */
119 #include <IOKit/IOBSD.h>
120 
121 #if VM_MAP_DEBUG_APPLE_PROTECT
122 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
123 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
124 
125 #if VM_MAP_DEBUG_FOURK
126 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
127 #endif /* VM_MAP_DEBUG_FOURK */
128 
129 #if DEVELOPMENT || DEBUG
130 
131 static int
132 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
133 {
134 #pragma unused(arg1, arg2)
135 	vm_offset_t     kaddr;
136 	kern_return_t   kr;
137 	int     error = 0;
138 	int     size = 0;
139 
140 	error = sysctl_handle_int(oidp, &size, 0, req);
141 	if (error || !req->newptr) {
142 		return error;
143 	}
144 
145 	kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
146 
147 	if (kr == KERN_SUCCESS) {
148 		kmem_free(kernel_map, kaddr, size);
149 	}
150 
151 	return error;
152 }
153 
154 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
155     0, 0, &sysctl_kmem_alloc_contig, "I", "");
156 
157 extern int vm_region_footprint;
158 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
159 
160 #endif /* DEVELOPMENT || DEBUG */
161 
162 static int
163 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
164 {
165 #pragma unused(arg1, arg2, oidp)
166 	int     error = 0;
167 	int     value;
168 
169 	value = task_self_region_footprint();
170 	error = SYSCTL_OUT(req, &value, sizeof(int));
171 	if (error) {
172 		return error;
173 	}
174 
175 	if (!req->newptr) {
176 		return 0;
177 	}
178 
179 	error = SYSCTL_IN(req, &value, sizeof(int));
180 	if (error) {
181 		return error;
182 	}
183 	task_self_region_footprint_set(value);
184 	return 0;
185 }
186 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
187 
188 static int
189 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
190 {
191 #pragma unused(arg1, arg2, oidp)
192 	int     error = 0;
193 	int     value;
194 
195 	value = (1 << thread_self_region_page_shift());
196 	error = SYSCTL_OUT(req, &value, sizeof(int));
197 	if (error) {
198 		return error;
199 	}
200 
201 	if (!req->newptr) {
202 		return 0;
203 	}
204 
205 	error = SYSCTL_IN(req, &value, sizeof(int));
206 	if (error) {
207 		return error;
208 	}
209 
210 	if (value != 0 && value != 4096 && value != 16384) {
211 		return EINVAL;
212 	}
213 
214 #if !__ARM_MIXED_PAGE_SIZE__
215 	if (value != vm_map_page_size(current_map())) {
216 		return EINVAL;
217 	}
218 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
219 
220 	thread_self_region_page_shift_set(bit_first(value));
221 	return 0;
222 }
223 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
224 
225 
226 #if DEVELOPMENT || DEBUG
227 extern int panic_on_unsigned_execute;
228 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
229 #endif /* DEVELOPMENT || DEBUG */
230 
231 extern int cs_executable_create_upl;
232 extern int cs_executable_wire;
233 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
234 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
235 
236 extern int apple_protect_pager_count;
237 extern int apple_protect_pager_count_mapped;
238 extern unsigned int apple_protect_pager_cache_limit;
239 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
240 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
241 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
242 
243 #if DEVELOPMENT || DEBUG
244 extern int radar_20146450;
245 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
246 
247 extern int macho_printf;
248 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
249 
250 extern int apple_protect_pager_data_request_debug;
251 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
252 
253 #if __arm__ || __arm64__
254 /* These are meant to support the page table accounting unit test. */
255 extern unsigned int arm_hardware_page_size;
256 extern unsigned int arm_pt_desc_size;
257 extern unsigned int arm_pt_root_size;
258 extern unsigned int free_page_size_tt_count;
259 extern unsigned int free_two_page_size_tt_count;
260 extern unsigned int free_tt_count;
261 extern unsigned int inuse_user_tteroot_count;
262 extern unsigned int inuse_kernel_tteroot_count;
263 extern unsigned int inuse_user_ttepages_count;
264 extern unsigned int inuse_kernel_ttepages_count;
265 extern unsigned int inuse_user_ptepages_count;
266 extern unsigned int inuse_kernel_ptepages_count;
267 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
268 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
269 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
270 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
271 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
272 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
273 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
274 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
275 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
276 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
277 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
278 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
279 #if DEVELOPMENT || DEBUG
280 extern unsigned long pmap_asid_flushes;
281 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
282 extern unsigned long pmap_asid_hits;
283 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
284 extern unsigned long pmap_asid_misses;
285 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
286 #endif
287 #endif /* __arm__ || __arm64__ */
288 
289 #if __arm64__
290 extern int fourk_pager_data_request_debug;
291 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
292 #endif /* __arm64__ */
293 #endif /* DEVELOPMENT || DEBUG */
294 
295 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
296 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
297 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
298 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
299 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
300 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
301 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
302 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
303 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
304 #if VM_SCAN_FOR_SHADOW_CHAIN
305 static int vm_shadow_max_enabled = 0;    /* Disabled by default */
306 extern int proc_shadow_max(void);
307 static int
308 vm_shadow_max SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2, oidp)
311 	int value = 0;
312 
313 	if (vm_shadow_max_enabled) {
314 		value = proc_shadow_max();
315 	}
316 
317 	return SYSCTL_OUT(req, &value, sizeof(value));
318 }
319 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
320     0, 0, &vm_shadow_max, "I", "");
321 
322 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
323 
324 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
325 
326 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
327 
328 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
329 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
330 /*
331  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
332  */
333 
334 #if DEVELOPMENT || DEBUG
335 extern int allow_stack_exec, allow_data_exec;
336 
337 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
338 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
339 
340 #endif /* DEVELOPMENT || DEBUG */
341 
342 static const char *prot_values[] = {
343 	"none",
344 	"read-only",
345 	"write-only",
346 	"read-write",
347 	"execute-only",
348 	"read-execute",
349 	"write-execute",
350 	"read-write-execute"
351 };
352 
353 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)354 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
355 {
356 	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
357 	    current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
358 }
359 
360 /*
361  * shared_region_unnest_logging: level of logging of unnesting events
362  * 0	- no logging
363  * 1	- throttled logging of unexpected unnesting events (default)
364  * 2	- unthrottled logging of unexpected unnesting events
365  * 3+	- unthrottled logging of all unnesting events
366  */
367 int shared_region_unnest_logging = 1;
368 
369 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
370     &shared_region_unnest_logging, 0, "");
371 
372 int vm_shared_region_unnest_log_interval = 10;
373 int shared_region_unnest_log_count_threshold = 5;
374 
375 /*
376  * Shared cache path enforcement.
377  */
378 
379 #if XNU_TARGET_OS_OSX
380 
381 #if defined (__x86_64__)
382 static int scdir_enforce = 1;
383 #else /* defined (__x86_64__) */
384 static int scdir_enforce = 0;   /* AOT caches live elsewhere */
385 #endif /* defined (__x86_64__) */
386 
387 static char scdir_path[] = "/System/Library/dyld/";
388 
389 #else /* XNU_TARGET_OS_OSX */
390 
391 static int scdir_enforce = 0;
392 static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/";
393 
394 #endif /* XNU_TARGET_OS_OSX */
395 
396 static char driverkit_scdir_path[] = "/System/DriverKit/System/Library/dyld/";
397 
398 #ifndef SECURE_KERNEL
399 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
400 {
401 #if CONFIG_CSR
402 	if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
403 		printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
404 		return EPERM;
405 	}
406 #endif /* CONFIG_CSR */
407 	return sysctl_handle_int(oidp, arg1, arg2, req);
408 }
409 
410 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
411 #endif
412 
413 /* These log rate throttling state variables aren't thread safe, but
414  * are sufficient unto the task.
415  */
416 static int64_t last_unnest_log_time = 0;
417 static int shared_region_unnest_log_count = 0;
418 
419 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)420 log_unnest_badness(
421 	vm_map_t        m,
422 	vm_map_offset_t s,
423 	vm_map_offset_t e,
424 	boolean_t       is_nested_map,
425 	vm_map_offset_t lowest_unnestable_addr)
426 {
427 	struct timeval  tv;
428 
429 	if (shared_region_unnest_logging == 0) {
430 		return;
431 	}
432 
433 	if (shared_region_unnest_logging <= 2 &&
434 	    is_nested_map &&
435 	    s >= lowest_unnestable_addr) {
436 		/*
437 		 * Unnesting of writable map entries is fine.
438 		 */
439 		return;
440 	}
441 
442 	if (shared_region_unnest_logging <= 1) {
443 		microtime(&tv);
444 		if ((tv.tv_sec - last_unnest_log_time) <
445 		    vm_shared_region_unnest_log_interval) {
446 			if (shared_region_unnest_log_count++ >
447 			    shared_region_unnest_log_count_threshold) {
448 				return;
449 			}
450 		} else {
451 			last_unnest_log_time = tv.tv_sec;
452 			shared_region_unnest_log_count = 0;
453 		}
454 	}
455 
456 	DTRACE_VM4(log_unnest_badness,
457 	    vm_map_t, m,
458 	    vm_map_offset_t, s,
459 	    vm_map_offset_t, e,
460 	    vm_map_offset_t, lowest_unnestable_addr);
461 	printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
462 }
463 
464 int
useracc(user_addr_t addr,user_size_t len,int prot)465 useracc(
466 	user_addr_t     addr,
467 	user_size_t     len,
468 	int     prot)
469 {
470 	vm_map_t        map;
471 
472 	map = current_map();
473 	return vm_map_check_protection(
474 		map,
475 		vm_map_trunc_page(addr,
476 		vm_map_page_mask(map)),
477 		vm_map_round_page(addr + len,
478 		vm_map_page_mask(map)),
479 		prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
480 }
481 
482 int
vslock(user_addr_t addr,user_size_t len)483 vslock(
484 	user_addr_t     addr,
485 	user_size_t     len)
486 {
487 	kern_return_t   kret;
488 	vm_map_t        map;
489 
490 	map = current_map();
491 	kret = vm_map_wire_kernel(map,
492 	    vm_map_trunc_page(addr,
493 	    vm_map_page_mask(map)),
494 	    vm_map_round_page(addr + len,
495 	    vm_map_page_mask(map)),
496 	    VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
497 	    FALSE);
498 
499 	switch (kret) {
500 	case KERN_SUCCESS:
501 		return 0;
502 	case KERN_INVALID_ADDRESS:
503 	case KERN_NO_SPACE:
504 		return ENOMEM;
505 	case KERN_PROTECTION_FAILURE:
506 		return EACCES;
507 	default:
508 		return EINVAL;
509 	}
510 }
511 
512 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)513 vsunlock(
514 	user_addr_t addr,
515 	user_size_t len,
516 	__unused int dirtied)
517 {
518 #if FIXME  /* [ */
519 	pmap_t          pmap;
520 	vm_page_t       pg;
521 	vm_map_offset_t vaddr;
522 	ppnum_t         paddr;
523 #endif  /* FIXME ] */
524 	kern_return_t   kret;
525 	vm_map_t        map;
526 
527 	map = current_map();
528 
529 #if FIXME  /* [ */
530 	if (dirtied) {
531 		pmap = get_task_pmap(current_task());
532 		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
533 		    vaddr < vm_map_round_page(addr + len, PAGE_MASK);
534 		    vaddr += PAGE_SIZE) {
535 			paddr = pmap_find_phys(pmap, vaddr);
536 			pg = PHYS_TO_VM_PAGE(paddr);
537 			vm_page_set_modified(pg);
538 		}
539 	}
540 #endif  /* FIXME ] */
541 #ifdef  lint
542 	dirtied++;
543 #endif  /* lint */
544 	kret = vm_map_unwire(map,
545 	    vm_map_trunc_page(addr,
546 	    vm_map_page_mask(map)),
547 	    vm_map_round_page(addr + len,
548 	    vm_map_page_mask(map)),
549 	    FALSE);
550 	switch (kret) {
551 	case KERN_SUCCESS:
552 		return 0;
553 	case KERN_INVALID_ADDRESS:
554 	case KERN_NO_SPACE:
555 		return ENOMEM;
556 	case KERN_PROTECTION_FAILURE:
557 		return EACCES;
558 	default:
559 		return EINVAL;
560 	}
561 }
562 
563 int
subyte(user_addr_t addr,int byte)564 subyte(
565 	user_addr_t addr,
566 	int byte)
567 {
568 	char character;
569 
570 	character = (char)byte;
571 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
572 }
573 
574 int
suibyte(user_addr_t addr,int byte)575 suibyte(
576 	user_addr_t addr,
577 	int byte)
578 {
579 	char character;
580 
581 	character = (char)byte;
582 	return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
583 }
584 
585 int
fubyte(user_addr_t addr)586 fubyte(user_addr_t addr)
587 {
588 	unsigned char byte;
589 
590 	if (copyin(addr, (void *) &byte, sizeof(char))) {
591 		return -1;
592 	}
593 	return byte;
594 }
595 
596 int
fuibyte(user_addr_t addr)597 fuibyte(user_addr_t addr)
598 {
599 	unsigned char byte;
600 
601 	if (copyin(addr, (void *) &(byte), sizeof(char))) {
602 		return -1;
603 	}
604 	return byte;
605 }
606 
607 int
suword(user_addr_t addr,long word)608 suword(
609 	user_addr_t addr,
610 	long word)
611 {
612 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
613 }
614 
615 long
fuword(user_addr_t addr)616 fuword(user_addr_t addr)
617 {
618 	long word = 0;
619 
620 	if (copyin(addr, (void *) &word, sizeof(int))) {
621 		return -1;
622 	}
623 	return word;
624 }
625 
626 /* suiword and fuiword are the same as suword and fuword, respectively */
627 
628 int
suiword(user_addr_t addr,long word)629 suiword(
630 	user_addr_t addr,
631 	long word)
632 {
633 	return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
634 }
635 
636 long
fuiword(user_addr_t addr)637 fuiword(user_addr_t addr)
638 {
639 	long word = 0;
640 
641 	if (copyin(addr, (void *) &word, sizeof(int))) {
642 		return -1;
643 	}
644 	return word;
645 }
646 
647 /*
648  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
649  * fetching and setting of process-sized size_t and pointer values.
650  */
651 int
sulong(user_addr_t addr,int64_t word)652 sulong(user_addr_t addr, int64_t word)
653 {
654 	if (IS_64BIT_PROCESS(current_proc())) {
655 		return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
656 	} else {
657 		return suiword(addr, (long)word);
658 	}
659 }
660 
661 int64_t
fulong(user_addr_t addr)662 fulong(user_addr_t addr)
663 {
664 	int64_t longword;
665 
666 	if (IS_64BIT_PROCESS(current_proc())) {
667 		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
668 			return -1;
669 		}
670 		return longword;
671 	} else {
672 		return (int64_t)fuiword(addr);
673 	}
674 }
675 
676 int
suulong(user_addr_t addr,uint64_t uword)677 suulong(user_addr_t addr, uint64_t uword)
678 {
679 	if (IS_64BIT_PROCESS(current_proc())) {
680 		return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
681 	} else {
682 		return suiword(addr, (uint32_t)uword);
683 	}
684 }
685 
686 uint64_t
fuulong(user_addr_t addr)687 fuulong(user_addr_t addr)
688 {
689 	uint64_t ulongword;
690 
691 	if (IS_64BIT_PROCESS(current_proc())) {
692 		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
693 			return -1ULL;
694 		}
695 		return ulongword;
696 	} else {
697 		return (uint64_t)fuiword(addr);
698 	}
699 }
700 
701 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)702 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
703 {
704 	return ENOTSUP;
705 }
706 
707 /*
708  * pid_for_task
709  *
710  * Find the BSD process ID for the Mach task associated with the given Mach port
711  * name
712  *
713  * Parameters:	args		User argument descriptor (see below)
714  *
715  * Indirect parameters:	args->t		Mach port name
716  *                      args->pid	Process ID (returned value; see below)
717  *
718  * Returns:	KERL_SUCCESS	Success
719  *              KERN_FAILURE	Not success
720  *
721  * Implicit returns: args->pid		Process ID
722  *
723  */
724 kern_return_t
pid_for_task(struct pid_for_task_args * args)725 pid_for_task(
726 	struct pid_for_task_args *args)
727 {
728 	mach_port_name_t        t = args->t;
729 	user_addr_t             pid_addr  = args->pid;
730 	proc_t p;
731 	task_t          t1;
732 	int     pid = -1;
733 	kern_return_t   err = KERN_SUCCESS;
734 
735 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
736 	AUDIT_ARG(mach_port1, t);
737 
738 	t1 = port_name_to_task_name(t);
739 
740 	if (t1 == TASK_NULL) {
741 		err = KERN_FAILURE;
742 		goto pftout;
743 	} else {
744 		p = get_bsdtask_info(t1);
745 		if (p) {
746 			pid  = proc_pid(p);
747 			err = KERN_SUCCESS;
748 		} else if (is_corpsetask(t1)) {
749 			pid = task_pid(t1);
750 			err = KERN_SUCCESS;
751 		} else {
752 			err = KERN_FAILURE;
753 		}
754 	}
755 	task_deallocate(t1);
756 pftout:
757 	AUDIT_ARG(pid, pid);
758 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
759 	AUDIT_MACH_SYSCALL_EXIT(err);
760 	return err;
761 }
762 
763 /*
764  *
765  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
766  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
767  *
768  */
769 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
770 
771 /*
772  *	Routine:	task_for_pid_posix_check
773  *	Purpose:
774  *			Verify that the current process should be allowed to
775  *			get the target process's task port. This is only
776  *			permitted if:
777  *			- The current process is root
778  *			OR all of the following are true:
779  *			- The target process's real, effective, and saved uids
780  *			  are the same as the current proc's euid,
781  *			- The target process's group set is a subset of the
782  *			  calling process's group set, and
783  *			- The target process hasn't switched credentials.
784  *
785  *	Returns:	TRUE: permitted
786  *			FALSE: denied
787  */
788 static int
task_for_pid_posix_check(proc_t target)789 task_for_pid_posix_check(proc_t target)
790 {
791 	kauth_cred_t targetcred, mycred;
792 	uid_t myuid;
793 	int allowed;
794 
795 	/* No task_for_pid on bad targets */
796 	if (target->p_stat == SZOMB) {
797 		return FALSE;
798 	}
799 
800 	mycred = kauth_cred_get();
801 	myuid = kauth_cred_getuid(mycred);
802 
803 	/* If we're running as root, the check passes */
804 	if (kauth_cred_issuser(mycred)) {
805 		return TRUE;
806 	}
807 
808 	/* We're allowed to get our own task port */
809 	if (target == current_proc()) {
810 		return TRUE;
811 	}
812 
813 	/*
814 	 * Under DENY, only root can get another proc's task port,
815 	 * so no more checks are needed.
816 	 */
817 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
818 		return FALSE;
819 	}
820 
821 	targetcred = kauth_cred_proc_ref(target);
822 	allowed = TRUE;
823 
824 	/* Do target's ruid, euid, and saved uid match my euid? */
825 	if ((kauth_cred_getuid(targetcred) != myuid) ||
826 	    (kauth_cred_getruid(targetcred) != myuid) ||
827 	    (kauth_cred_getsvuid(targetcred) != myuid)) {
828 		allowed = FALSE;
829 		goto out;
830 	}
831 
832 	/* Are target's groups a subset of my groups? */
833 	if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
834 	    allowed == 0) {
835 		allowed = FALSE;
836 		goto out;
837 	}
838 
839 	/* Has target switched credentials? */
840 	if (target->p_flag & P_SUGID) {
841 		allowed = FALSE;
842 		goto out;
843 	}
844 
845 out:
846 	kauth_cred_unref(&targetcred);
847 	return allowed;
848 }
849 
850 /*
851  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
852  *
853  *	Description:	Waits for the user space daemon to respond to the request
854  *			we made. Function declared non inline to be visible in
855  *			stackshots and spindumps as well as debugging.
856  */
857 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)858 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
859 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
860 {
861 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
862 }
863 
864 /*
865  *	Routine:	task_for_pid
866  *	Purpose:
867  *		Get the task port for another "process", named by its
868  *		process ID on the same host as "target_task".
869  *
870  *		Only permitted to privileged processes, or processes
871  *		with the same user ID.
872  *
873  *		Note: if pid == 0, an error is return no matter who is calling.
874  *
875  * XXX This should be a BSD system call, not a Mach trap!!!
876  */
877 kern_return_t
task_for_pid(struct task_for_pid_args * args)878 task_for_pid(
879 	struct task_for_pid_args *args)
880 {
881 	mach_port_name_t        target_tport = args->target_tport;
882 	int                     pid = args->pid;
883 	user_addr_t             task_addr = args->t;
884 	proc_t                  p = PROC_NULL;
885 	task_t                  t1 = TASK_NULL;
886 	task_t                  task = TASK_NULL;
887 	mach_port_name_t        tret = MACH_PORT_NULL;
888 	ipc_port_t              tfpport = MACH_PORT_NULL;
889 	void                    * sright = NULL;
890 	int                     error = 0;
891 	boolean_t               is_current_proc = FALSE;
892 	struct proc_ident       pident = {0};
893 
894 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
895 	AUDIT_ARG(pid, pid);
896 	AUDIT_ARG(mach_port1, target_tport);
897 
898 	/* Always check if pid == 0 */
899 	if (pid == 0) {
900 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
901 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
902 		return KERN_FAILURE;
903 	}
904 
905 	t1 = port_name_to_task(target_tport);
906 	if (t1 == TASK_NULL) {
907 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
908 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
909 		return KERN_FAILURE;
910 	}
911 
912 
913 	p = proc_find(pid);
914 	if (p == PROC_NULL) {
915 		error = KERN_FAILURE;
916 		goto tfpout;
917 	}
918 	pident = proc_ident(p);
919 	is_current_proc = (p == current_proc());
920 
921 #if CONFIG_AUDIT
922 	AUDIT_ARG(process, p);
923 #endif
924 
925 	if (!(task_for_pid_posix_check(p))) {
926 		error = KERN_FAILURE;
927 		goto tfpout;
928 	}
929 
930 	if (p->task == TASK_NULL) {
931 		error = KERN_SUCCESS;
932 		goto tfpout;
933 	}
934 
935 	/*
936 	 * Grab a task reference and drop the proc reference as the proc ref
937 	 * shouldn't be held accross upcalls.
938 	 */
939 	task = p->task;
940 	task_reference(task);
941 
942 	proc_rele(p);
943 	p = PROC_NULL;
944 
945 #if CONFIG_MACF
946 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
947 	if (error) {
948 		error = KERN_FAILURE;
949 		goto tfpout;
950 	}
951 #endif
952 
953 	/* If we aren't root and target's task access port is set... */
954 	if (!kauth_cred_issuser(kauth_cred_get()) &&
955 	    !is_current_proc &&
956 	    (task_get_task_access_port(task, &tfpport) == 0) &&
957 	    (tfpport != IPC_PORT_NULL)) {
958 		if (tfpport == IPC_PORT_DEAD) {
959 			error = KERN_PROTECTION_FAILURE;
960 			goto tfpout;
961 		}
962 
963 		/* Call up to the task access server */
964 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
965 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
966 
967 		if (error != MACH_MSG_SUCCESS) {
968 			if (error == MACH_RCV_INTERRUPTED) {
969 				error = KERN_ABORTED;
970 			} else {
971 				error = KERN_FAILURE;
972 			}
973 			goto tfpout;
974 		}
975 	}
976 
977 	/* Grant task port access */
978 	extmod_statistics_incr_task_for_pid(task);
979 
980 	if (task == current_task()) {
981 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
982 		sright = (void *)convert_task_to_port_pinned(task);
983 	} else {
984 		sright = (void *)convert_task_to_port(task);
985 	}
986 
987 	/* Check if the task has been corpsified */
988 	if (is_corpsetask(task)) {
989 		/* task ref consumed by convert_task_to_port */
990 		task = TASK_NULL;
991 		ipc_port_release_send(sright);
992 		error = KERN_FAILURE;
993 		goto tfpout;
994 	}
995 
996 	/* task ref consumed by convert_task_to_port */
997 	task = TASK_NULL;
998 	tret = ipc_port_copyout_send(
999 		sright,
1000 		get_task_ipcspace(current_task()));
1001 
1002 	error = KERN_SUCCESS;
1003 
1004 tfpout:
1005 	task_deallocate(t1);
1006 	AUDIT_ARG(mach_port2, tret);
1007 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1008 
1009 	if (tfpport != IPC_PORT_NULL) {
1010 		ipc_port_release_send(tfpport);
1011 	}
1012 	if (task != TASK_NULL) {
1013 		task_deallocate(task);
1014 	}
1015 	if (p != PROC_NULL) {
1016 		proc_rele(p);
1017 	}
1018 	AUDIT_MACH_SYSCALL_EXIT(error);
1019 	return error;
1020 }
1021 
1022 /*
1023  *	Routine:	task_name_for_pid
1024  *	Purpose:
1025  *		Get the task name port for another "process", named by its
1026  *		process ID on the same host as "target_task".
1027  *
1028  *		Only permitted to privileged processes, or processes
1029  *		with the same user ID.
1030  *
1031  * XXX This should be a BSD system call, not a Mach trap!!!
1032  */
1033 
1034 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1035 task_name_for_pid(
1036 	struct task_name_for_pid_args *args)
1037 {
1038 	mach_port_name_t        target_tport = args->target_tport;
1039 	int                     pid = args->pid;
1040 	user_addr_t             task_addr = args->t;
1041 	proc_t                  p = PROC_NULL;
1042 	task_t                  t1 = TASK_NULL;
1043 	mach_port_name_t        tret = MACH_PORT_NULL;
1044 	void * sright;
1045 	int error = 0, refheld = 0;
1046 	kauth_cred_t target_cred;
1047 
1048 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1049 	AUDIT_ARG(pid, pid);
1050 	AUDIT_ARG(mach_port1, target_tport);
1051 
1052 	t1 = port_name_to_task(target_tport);
1053 	if (t1 == TASK_NULL) {
1054 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1055 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1056 		return KERN_FAILURE;
1057 	}
1058 
1059 	p = proc_find(pid);
1060 	if (p != PROC_NULL) {
1061 		AUDIT_ARG(process, p);
1062 		target_cred = kauth_cred_proc_ref(p);
1063 		refheld = 1;
1064 
1065 		if ((p->p_stat != SZOMB)
1066 		    && ((current_proc() == p)
1067 		    || kauth_cred_issuser(kauth_cred_get())
1068 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1069 		    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1070 			if (p->task != TASK_NULL) {
1071 				struct proc_ident pident = proc_ident(p);
1072 
1073 				task_t task = p->task;
1074 
1075 				task_reference(p->task);
1076 				proc_rele(p);
1077 				p = PROC_NULL;
1078 #if CONFIG_MACF
1079 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1080 				if (error) {
1081 					task_deallocate(task);
1082 					goto noperm;
1083 				}
1084 #endif
1085 				sright = (void *)convert_task_name_to_port(task);
1086 				task = NULL;
1087 				tret = ipc_port_copyout_send(sright,
1088 				    get_task_ipcspace(current_task()));
1089 			} else {
1090 				tret  = MACH_PORT_NULL;
1091 			}
1092 
1093 			AUDIT_ARG(mach_port2, tret);
1094 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1095 			task_deallocate(t1);
1096 			error = KERN_SUCCESS;
1097 			goto tnfpout;
1098 		}
1099 	}
1100 
1101 #if CONFIG_MACF
1102 noperm:
1103 #endif
1104 	task_deallocate(t1);
1105 	tret = MACH_PORT_NULL;
1106 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1107 	error = KERN_FAILURE;
1108 tnfpout:
1109 	if (refheld != 0) {
1110 		kauth_cred_unref(&target_cred);
1111 	}
1112 	if (p != PROC_NULL) {
1113 		proc_rele(p);
1114 	}
1115 	AUDIT_MACH_SYSCALL_EXIT(error);
1116 	return error;
1117 }
1118 
1119 /*
1120  *	Routine:	task_inspect_for_pid
1121  *	Purpose:
1122  *		Get the task inspect port for another "process", named by its
1123  *		process ID on the same host as "target_task".
1124  */
1125 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1126 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1127 {
1128 	mach_port_name_t        target_tport = args->target_tport;
1129 	int                     pid = args->pid;
1130 	user_addr_t             task_addr = args->t;
1131 
1132 	proc_t                  proc = PROC_NULL;
1133 	task_t                  t1 = TASK_NULL;
1134 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
1135 	mach_port_name_t        tret = MACH_PORT_NULL;
1136 	ipc_port_t              tfpport = MACH_PORT_NULL;
1137 	int                     error = 0;
1138 	void                    *sright = NULL;
1139 	boolean_t               is_current_proc = FALSE;
1140 	struct proc_ident       pident = {0};
1141 
1142 	/* Disallow inspect port for kernel_task */
1143 	if (pid == 0) {
1144 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1145 		return EPERM;
1146 	}
1147 
1148 	t1 = port_name_to_task(target_tport);
1149 	if (t1 == TASK_NULL) {
1150 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1151 		return EINVAL;
1152 	}
1153 
1154 	proc = proc_find(pid);
1155 	if (proc == PROC_NULL) {
1156 		error = ESRCH;
1157 		goto tifpout;
1158 	}
1159 	pident = proc_ident(proc);
1160 	is_current_proc = (proc == current_proc());
1161 
1162 	if (!(task_for_pid_posix_check(proc))) {
1163 		error = EPERM;
1164 		goto tifpout;
1165 	}
1166 
1167 	task_insp = proc->task;
1168 	if (task_insp == TASK_INSPECT_NULL) {
1169 		goto tifpout;
1170 	}
1171 
1172 	/*
1173 	 * Grab a task reference and drop the proc reference before making any upcalls.
1174 	 */
1175 	task_reference(task_insp);
1176 
1177 	proc_rele(proc);
1178 	proc = PROC_NULL;
1179 
1180 #if CONFIG_MACF
1181 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1182 	if (error) {
1183 		error = EPERM;
1184 		goto tifpout;
1185 	}
1186 #endif
1187 
1188 	/* If we aren't root and target's task access port is set... */
1189 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1190 	    !is_current_proc &&
1191 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1192 	    (tfpport != IPC_PORT_NULL)) {
1193 		if (tfpport == IPC_PORT_DEAD) {
1194 			error = EACCES;
1195 			goto tifpout;
1196 		}
1197 
1198 
1199 		/* Call up to the task access server */
1200 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1201 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1202 
1203 		if (error != MACH_MSG_SUCCESS) {
1204 			if (error == MACH_RCV_INTERRUPTED) {
1205 				error = EINTR;
1206 			} else {
1207 				error = EPERM;
1208 			}
1209 			goto tifpout;
1210 		}
1211 	}
1212 
1213 	/* Check if the task has been corpsified */
1214 	if (is_corpsetask(task_insp)) {
1215 		error = EACCES;
1216 		goto tifpout;
1217 	}
1218 
1219 	/* could be IP_NULL, consumes a ref */
1220 	sright = (void*) convert_task_inspect_to_port(task_insp);
1221 	task_insp = TASK_INSPECT_NULL;
1222 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1223 
1224 tifpout:
1225 	task_deallocate(t1);
1226 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1227 	if (proc != PROC_NULL) {
1228 		proc_rele(proc);
1229 	}
1230 	if (tfpport != IPC_PORT_NULL) {
1231 		ipc_port_release_send(tfpport);
1232 	}
1233 	if (task_insp != TASK_INSPECT_NULL) {
1234 		task_deallocate(task_insp);
1235 	}
1236 
1237 	*ret = error;
1238 	return error;
1239 }
1240 
1241 /*
1242  *	Routine:	task_read_for_pid
1243  *	Purpose:
1244  *		Get the task read port for another "process", named by its
1245  *		process ID on the same host as "target_task".
1246  */
1247 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1248 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1249 {
1250 	mach_port_name_t        target_tport = args->target_tport;
1251 	int                     pid = args->pid;
1252 	user_addr_t             task_addr = args->t;
1253 
1254 	proc_t                  proc = PROC_NULL;
1255 	task_t                  t1 = TASK_NULL;
1256 	task_read_t             task_read = TASK_READ_NULL;
1257 	mach_port_name_t        tret = MACH_PORT_NULL;
1258 	ipc_port_t              tfpport = MACH_PORT_NULL;
1259 	int                     error = 0;
1260 	void                    *sright = NULL;
1261 	boolean_t               is_current_proc = FALSE;
1262 	struct proc_ident       pident = {0};
1263 
1264 	/* Disallow read port for kernel_task */
1265 	if (pid == 0) {
1266 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1267 		return EPERM;
1268 	}
1269 
1270 	t1 = port_name_to_task(target_tport);
1271 	if (t1 == TASK_NULL) {
1272 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1273 		return EINVAL;
1274 	}
1275 
1276 	proc = proc_find(pid);
1277 	if (proc == PROC_NULL) {
1278 		error = ESRCH;
1279 		goto trfpout;
1280 	}
1281 	pident = proc_ident(proc);
1282 	is_current_proc = (proc == current_proc());
1283 
1284 	if (!(task_for_pid_posix_check(proc))) {
1285 		error = EPERM;
1286 		goto trfpout;
1287 	}
1288 
1289 	task_read = proc->task;
1290 	if (task_read == TASK_INSPECT_NULL) {
1291 		goto trfpout;
1292 	}
1293 
1294 	/*
1295 	 * Grab a task reference and drop the proc reference before making any upcalls.
1296 	 */
1297 	task_reference(task_read);
1298 
1299 	proc_rele(proc);
1300 	proc = PROC_NULL;
1301 
1302 #if CONFIG_MACF
1303 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1304 	if (error) {
1305 		error = EPERM;
1306 		goto trfpout;
1307 	}
1308 #endif
1309 
1310 	/* If we aren't root and target's task access port is set... */
1311 	if (!kauth_cred_issuser(kauth_cred_get()) &&
1312 	    !is_current_proc &&
1313 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
1314 	    (tfpport != IPC_PORT_NULL)) {
1315 		if (tfpport == IPC_PORT_DEAD) {
1316 			error = EACCES;
1317 			goto trfpout;
1318 		}
1319 
1320 
1321 		/* Call up to the task access server */
1322 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1323 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1324 
1325 		if (error != MACH_MSG_SUCCESS) {
1326 			if (error == MACH_RCV_INTERRUPTED) {
1327 				error = EINTR;
1328 			} else {
1329 				error = EPERM;
1330 			}
1331 			goto trfpout;
1332 		}
1333 	}
1334 
1335 	/* Check if the task has been corpsified */
1336 	if (is_corpsetask(task_read)) {
1337 		error = EACCES;
1338 		goto trfpout;
1339 	}
1340 
1341 	/* could be IP_NULL, consumes a ref */
1342 	sright = (void*) convert_task_read_to_port(task_read);
1343 	task_read = TASK_READ_NULL;
1344 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1345 
1346 trfpout:
1347 	task_deallocate(t1);
1348 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1349 	if (proc != PROC_NULL) {
1350 		proc_rele(proc);
1351 	}
1352 	if (tfpport != IPC_PORT_NULL) {
1353 		ipc_port_release_send(tfpport);
1354 	}
1355 	if (task_read != TASK_READ_NULL) {
1356 		task_deallocate(task_read);
1357 	}
1358 
1359 	*ret = error;
1360 	return error;
1361 }
1362 
1363 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1364 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1365 {
1366 	task_t  target = NULL;
1367 	proc_t  targetproc = PROC_NULL;
1368 	int     pid = args->pid;
1369 	int     error = 0;
1370 	mach_port_t tfpport = MACH_PORT_NULL;
1371 
1372 	if (pid == 0) {
1373 		error = EPERM;
1374 		goto out;
1375 	}
1376 
1377 	targetproc = proc_find(pid);
1378 	if (targetproc == PROC_NULL) {
1379 		error = ESRCH;
1380 		goto out;
1381 	}
1382 
1383 	if (!task_for_pid_posix_check(targetproc) &&
1384 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1385 		error = EPERM;
1386 		goto out;
1387 	}
1388 
1389 #if CONFIG_MACF
1390 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1391 	if (error) {
1392 		error = EPERM;
1393 		goto out;
1394 	}
1395 #endif
1396 
1397 	target = targetproc->task;
1398 #if XNU_TARGET_OS_OSX
1399 	if (target != TASK_NULL) {
1400 		/* If we aren't root and target's task access port is set... */
1401 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1402 		    targetproc != current_proc() &&
1403 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1404 		    (tfpport != IPC_PORT_NULL)) {
1405 			if (tfpport == IPC_PORT_DEAD) {
1406 				error = EACCES;
1407 				goto out;
1408 			}
1409 
1410 			/* Call up to the task access server */
1411 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1412 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1413 
1414 			if (error != MACH_MSG_SUCCESS) {
1415 				if (error == MACH_RCV_INTERRUPTED) {
1416 					error = EINTR;
1417 				} else {
1418 					error = EPERM;
1419 				}
1420 				goto out;
1421 			}
1422 		}
1423 	}
1424 #endif /* XNU_TARGET_OS_OSX */
1425 
1426 	task_reference(target);
1427 	error = task_pidsuspend(target);
1428 	if (error) {
1429 		if (error == KERN_INVALID_ARGUMENT) {
1430 			error = EINVAL;
1431 		} else {
1432 			error = EPERM;
1433 		}
1434 	}
1435 #if CONFIG_MEMORYSTATUS
1436 	else {
1437 		memorystatus_on_suspend(targetproc);
1438 	}
1439 #endif
1440 
1441 	task_deallocate(target);
1442 
1443 out:
1444 	if (tfpport != IPC_PORT_NULL) {
1445 		ipc_port_release_send(tfpport);
1446 	}
1447 
1448 	if (targetproc != PROC_NULL) {
1449 		proc_rele(targetproc);
1450 	}
1451 	*ret = error;
1452 	return error;
1453 }
1454 
1455 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1456 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1457 {
1458 	mach_port_name_t        target_tport = args->target_tport;
1459 	int                     pid = args->pid;
1460 	user_addr_t             task_addr = args->t;
1461 	proc_t                  p = PROC_NULL;
1462 	task_t                  t1 = TASK_NULL;
1463 	task_t                  task = TASK_NULL;
1464 	mach_port_name_t        tret = MACH_PORT_NULL;
1465 	ipc_port_t              tfpport = MACH_PORT_NULL;
1466 	ipc_port_t              sright = NULL;
1467 	int                     error = 0;
1468 	boolean_t               is_current_proc = FALSE;
1469 	struct proc_ident       pident = {0};
1470 
1471 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1472 	AUDIT_ARG(pid, pid);
1473 	AUDIT_ARG(mach_port1, target_tport);
1474 
1475 	/* Always check if pid == 0 */
1476 	if (pid == 0) {
1477 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1478 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1479 		return KERN_FAILURE;
1480 	}
1481 
1482 	t1 = port_name_to_task(target_tport);
1483 	if (t1 == TASK_NULL) {
1484 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1485 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1486 		return KERN_FAILURE;
1487 	}
1488 
1489 	p = proc_find(pid);
1490 	if (p == PROC_NULL) {
1491 		error = KERN_FAILURE;
1492 		goto tfpout;
1493 	}
1494 	pident = proc_ident(p);
1495 	is_current_proc = (p == current_proc());
1496 
1497 #if CONFIG_AUDIT
1498 	AUDIT_ARG(process, p);
1499 #endif
1500 
1501 	if (!(task_for_pid_posix_check(p))) {
1502 		error = KERN_FAILURE;
1503 		goto tfpout;
1504 	}
1505 
1506 	if (p->task == TASK_NULL) {
1507 		error = KERN_SUCCESS;
1508 		goto tfpout;
1509 	}
1510 
1511 	/*
1512 	 * Grab a task reference and drop the proc reference before making any upcalls.
1513 	 */
1514 	task = p->task;
1515 	task_reference(task);
1516 
1517 	proc_rele(p);
1518 	p = PROC_NULL;
1519 
1520 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1521 #if CONFIG_MACF
1522 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1523 		if (error) {
1524 			error = KERN_FAILURE;
1525 			goto tfpout;
1526 		}
1527 #endif
1528 
1529 		/* If we aren't root and target's task access port is set... */
1530 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1531 		    !is_current_proc &&
1532 		    (task_get_task_access_port(task, &tfpport) == 0) &&
1533 		    (tfpport != IPC_PORT_NULL)) {
1534 			if (tfpport == IPC_PORT_DEAD) {
1535 				error = KERN_PROTECTION_FAILURE;
1536 				goto tfpout;
1537 			}
1538 
1539 
1540 			/* Call up to the task access server */
1541 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1542 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1543 
1544 			if (error != MACH_MSG_SUCCESS) {
1545 				if (error == MACH_RCV_INTERRUPTED) {
1546 					error = KERN_ABORTED;
1547 				} else {
1548 					error = KERN_FAILURE;
1549 				}
1550 				goto tfpout;
1551 			}
1552 		}
1553 	}
1554 
1555 	/* Check if the task has been corpsified */
1556 	if (is_corpsetask(task)) {
1557 		error = KERN_FAILURE;
1558 		goto tfpout;
1559 	}
1560 
1561 	error = task_get_debug_control_port(task, &sright);
1562 	if (error != KERN_SUCCESS) {
1563 		goto tfpout;
1564 	}
1565 
1566 	tret = ipc_port_copyout_send(
1567 		sright,
1568 		get_task_ipcspace(current_task()));
1569 
1570 	error = KERN_SUCCESS;
1571 
1572 tfpout:
1573 	task_deallocate(t1);
1574 	AUDIT_ARG(mach_port2, tret);
1575 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1576 
1577 	if (tfpport != IPC_PORT_NULL) {
1578 		ipc_port_release_send(tfpport);
1579 	}
1580 	if (task != TASK_NULL) {
1581 		task_deallocate(task);
1582 	}
1583 	if (p != PROC_NULL) {
1584 		proc_rele(p);
1585 	}
1586 	AUDIT_MACH_SYSCALL_EXIT(error);
1587 	return error;
1588 }
1589 
1590 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1591 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1592 {
1593 	task_t  target = NULL;
1594 	proc_t  targetproc = PROC_NULL;
1595 	int     pid = args->pid;
1596 	int     error = 0;
1597 	mach_port_t tfpport = MACH_PORT_NULL;
1598 
1599 	if (pid == 0) {
1600 		error = EPERM;
1601 		goto out;
1602 	}
1603 
1604 	targetproc = proc_find(pid);
1605 	if (targetproc == PROC_NULL) {
1606 		error = ESRCH;
1607 		goto out;
1608 	}
1609 
1610 	if (!task_for_pid_posix_check(targetproc) &&
1611 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1612 		error = EPERM;
1613 		goto out;
1614 	}
1615 
1616 #if CONFIG_MACF
1617 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1618 	if (error) {
1619 		error = EPERM;
1620 		goto out;
1621 	}
1622 #endif
1623 
1624 	target = targetproc->task;
1625 #if XNU_TARGET_OS_OSX
1626 	if (target != TASK_NULL) {
1627 		/* If we aren't root and target's task access port is set... */
1628 		if (!kauth_cred_issuser(kauth_cred_get()) &&
1629 		    targetproc != current_proc() &&
1630 		    (task_get_task_access_port(target, &tfpport) == 0) &&
1631 		    (tfpport != IPC_PORT_NULL)) {
1632 			if (tfpport == IPC_PORT_DEAD) {
1633 				error = EACCES;
1634 				goto out;
1635 			}
1636 
1637 			/* Call up to the task access server */
1638 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1639 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1640 
1641 			if (error != MACH_MSG_SUCCESS) {
1642 				if (error == MACH_RCV_INTERRUPTED) {
1643 					error = EINTR;
1644 				} else {
1645 					error = EPERM;
1646 				}
1647 				goto out;
1648 			}
1649 		}
1650 	}
1651 #endif /* XNU_TARGET_OS_OSX */
1652 
1653 #if !XNU_TARGET_OS_OSX
1654 #if SOCKETS
1655 	resume_proc_sockets(targetproc);
1656 #endif /* SOCKETS */
1657 #endif /* !XNU_TARGET_OS_OSX */
1658 
1659 	task_reference(target);
1660 
1661 #if CONFIG_MEMORYSTATUS
1662 	memorystatus_on_resume(targetproc);
1663 #endif
1664 
1665 	error = task_pidresume(target);
1666 	if (error) {
1667 		if (error == KERN_INVALID_ARGUMENT) {
1668 			error = EINVAL;
1669 		} else {
1670 			if (error == KERN_MEMORY_ERROR) {
1671 				psignal(targetproc, SIGKILL);
1672 				error = EIO;
1673 			} else {
1674 				error = EPERM;
1675 			}
1676 		}
1677 	}
1678 
1679 	task_deallocate(target);
1680 
1681 out:
1682 	if (tfpport != IPC_PORT_NULL) {
1683 		ipc_port_release_send(tfpport);
1684 	}
1685 
1686 	if (targetproc != PROC_NULL) {
1687 		proc_rele(targetproc);
1688 	}
1689 
1690 	*ret = error;
1691 	return error;
1692 }
1693 
1694 #if !XNU_TARGET_OS_OSX
1695 /*
1696  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1697  * When a process is specified, this call is blocking, otherwise we wake up the
1698  * freezer thread and do not block on a process being frozen.
1699  */
1700 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1701 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1702 {
1703 	int     error = 0;
1704 	proc_t  targetproc = PROC_NULL;
1705 	int     pid = args->pid;
1706 
1707 #ifndef CONFIG_FREEZE
1708 	#pragma unused(pid)
1709 #else
1710 
1711 	/*
1712 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1713 	 */
1714 
1715 	if (pid >= 0) {
1716 		targetproc = proc_find(pid);
1717 
1718 		if (targetproc == PROC_NULL) {
1719 			error = ESRCH;
1720 			goto out;
1721 		}
1722 
1723 		if (!task_for_pid_posix_check(targetproc)) {
1724 			error = EPERM;
1725 			goto out;
1726 		}
1727 	}
1728 
1729 #if CONFIG_MACF
1730 	//Note that targetproc may be null
1731 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1732 	if (error) {
1733 		error = EPERM;
1734 		goto out;
1735 	}
1736 #endif
1737 
1738 	if (pid == -2) {
1739 		vm_pageout_anonymous_pages();
1740 	} else if (pid == -1) {
1741 		memorystatus_on_inactivity(targetproc);
1742 	} else {
1743 		error = memorystatus_freeze_process_sync(targetproc);
1744 	}
1745 
1746 out:
1747 
1748 #endif /* CONFIG_FREEZE */
1749 
1750 	if (targetproc != PROC_NULL) {
1751 		proc_rele(targetproc);
1752 	}
1753 	*ret = error;
1754 	return error;
1755 }
1756 #endif /* !XNU_TARGET_OS_OSX */
1757 
1758 #if SOCKETS
1759 int
networking_memstatus_callout(proc_t p,uint32_t status)1760 networking_memstatus_callout(proc_t p, uint32_t status)
1761 {
1762 	struct fileproc *fp;
1763 
1764 	/*
1765 	 * proc list lock NOT held
1766 	 * proc lock NOT held
1767 	 * a reference on the proc has been held / shall be dropped by the caller.
1768 	 */
1769 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1770 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1771 
1772 	proc_fdlock(p);
1773 
1774 	fdt_foreach(fp, p) {
1775 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1776 #if NECP
1777 		case DTYPE_NETPOLICY:
1778 			necp_fd_memstatus(p, status,
1779 			    (struct necp_fd_data *)fp_get_data(fp));
1780 			break;
1781 #endif /* NECP */
1782 #if SKYWALK
1783 		case DTYPE_CHANNEL:
1784 			kern_channel_memstatus(p, status,
1785 			    (struct kern_channel *)fp_get_data(fp));
1786 			break;
1787 #endif /* SKYWALK */
1788 		default:
1789 			break;
1790 		}
1791 	}
1792 	proc_fdunlock(p);
1793 
1794 	return 1;
1795 }
1796 
1797 #if SKYWALK
1798 /*
1799  * Since we make multiple passes across the fileproc array, record the
1800  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
1801  * large enough to accomodate most, if not all cases.  If we find more,
1802  * we'll go to the slow path during second pass.
1803  */
1804 #define MAX_CHANNELS    8       /* should be more than enough */
1805 #endif /* SKYWALK */
1806 
1807 static int
networking_defunct_callout(proc_t p,void * arg)1808 networking_defunct_callout(proc_t p, void *arg)
1809 {
1810 	struct pid_shutdown_sockets_args *args = arg;
1811 	int pid = args->pid;
1812 	int level = args->level;
1813 	struct fileproc *fp;
1814 #if SKYWALK
1815 	int i;
1816 	int channel_count = 0;
1817 	struct kern_channel *channel_array[MAX_CHANNELS];
1818 
1819 	bzero(&channel_array, sizeof(channel_array));
1820 #endif /* SKYWALK */
1821 
1822 	proc_fdlock(p);
1823 
1824 	fdt_foreach(fp, p) {
1825 		struct fileglob *fg = fp->fp_glob;
1826 
1827 		switch (FILEGLOB_DTYPE(fg)) {
1828 		case DTYPE_SOCKET: {
1829 			struct socket *so = (struct socket *)fg_get_data(fg);
1830 			if (proc_getpid(p) == pid || so->last_pid == pid ||
1831 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1832 				/* Call networking stack with socket and level */
1833 				(void)socket_defunct(p, so, level);
1834 			}
1835 			break;
1836 		}
1837 #if NECP
1838 		case DTYPE_NETPOLICY:
1839 			/* first pass: defunct necp and get stats for ntstat */
1840 			if (proc_getpid(p) == pid) {
1841 				necp_fd_defunct(p,
1842 				    (struct necp_fd_data *)fg_get_data(fg));
1843 			}
1844 			break;
1845 #endif /* NECP */
1846 #if SKYWALK
1847 		case DTYPE_CHANNEL:
1848 			/* first pass: get channels and total count */
1849 			if (proc_getpid(p) == pid) {
1850 				if (channel_count < MAX_CHANNELS) {
1851 					channel_array[channel_count] =
1852 					    (struct kern_channel *)fg_get_data(fg);
1853 				}
1854 				++channel_count;
1855 			}
1856 			break;
1857 #endif /* SKYWALK */
1858 		default:
1859 			break;
1860 		}
1861 	}
1862 
1863 #if SKYWALK
1864 	/*
1865 	 * Second pass: defunct channels/flows (after NECP).  Handle
1866 	 * the common case of up to MAX_CHANNELS count with fast path,
1867 	 * and traverse the fileproc array again only if we exceed it.
1868 	 */
1869 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1870 		ASSERT(proc_getpid(p) == pid);
1871 		for (i = 0; i < channel_count; i++) {
1872 			ASSERT(channel_array[i] != NULL);
1873 			kern_channel_defunct(p, channel_array[i]);
1874 		}
1875 	} else if (channel_count != 0) {
1876 		ASSERT(proc_getpid(p) == pid);
1877 		fdt_foreach(fp, p) {
1878 			struct fileglob *fg = fp->fp_glob;
1879 
1880 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1881 				kern_channel_defunct(p,
1882 				    (struct kern_channel *)fg_get_data(fg));
1883 			}
1884 		}
1885 	}
1886 #endif /* SKYWALK */
1887 	proc_fdunlock(p);
1888 
1889 	return PROC_RETURNED;
1890 }
1891 
1892 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1893 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1894 {
1895 	int                             error = 0;
1896 	proc_t                          targetproc = PROC_NULL;
1897 	int                             pid = args->pid;
1898 	int                             level = args->level;
1899 
1900 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1901 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1902 		error = EINVAL;
1903 		goto out;
1904 	}
1905 
1906 	targetproc = proc_find(pid);
1907 	if (targetproc == PROC_NULL) {
1908 		error = ESRCH;
1909 		goto out;
1910 	}
1911 
1912 	if (!task_for_pid_posix_check(targetproc) &&
1913 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1914 		error = EPERM;
1915 		goto out;
1916 	}
1917 
1918 #if CONFIG_MACF
1919 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1920 	if (error) {
1921 		error = EPERM;
1922 		goto out;
1923 	}
1924 #endif
1925 
1926 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1927 	    networking_defunct_callout, args, NULL, NULL);
1928 
1929 out:
1930 	if (targetproc != PROC_NULL) {
1931 		proc_rele(targetproc);
1932 	}
1933 	*ret = error;
1934 	return error;
1935 }
1936 
1937 #endif /* SOCKETS */
1938 
1939 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1940 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1941     __unused int arg2, struct sysctl_req *req)
1942 {
1943 	int error = 0;
1944 	int new_value;
1945 
1946 	error = SYSCTL_OUT(req, arg1, sizeof(int));
1947 	if (error || req->newptr == USER_ADDR_NULL) {
1948 		return error;
1949 	}
1950 
1951 	if (!kauth_cred_issuser(kauth_cred_get())) {
1952 		return EPERM;
1953 	}
1954 
1955 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1956 		goto out;
1957 	}
1958 	if ((new_value == KERN_TFP_POLICY_DENY)
1959 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1960 		tfp_policy = new_value;
1961 	} else {
1962 		error = EINVAL;
1963 	}
1964 out:
1965 	return error;
1966 }
1967 
1968 #if defined(SECURE_KERNEL)
1969 static int kern_secure_kernel = 1;
1970 #else
1971 static int kern_secure_kernel = 0;
1972 #endif
1973 
1974 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1975 
1976 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1977 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1978     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
1979 
1980 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1981     &shared_region_trace_level, 0, "");
1982 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1983     &shared_region_version, 0, "");
1984 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1985     &shared_region_persistence, 0, "");
1986 
1987 /*
1988  * shared_region_check_np:
1989  *
1990  * This system call is intended for dyld.
1991  *
1992  * dyld calls this when any process starts to see if the process's shared
1993  * region is already set up and ready to use.
1994  * This call returns the base address of the first mapping in the
1995  * process's shared region's first mapping.
1996  * dyld will then check what's mapped at that address.
1997  *
1998  * If the shared region is empty, dyld will then attempt to map the shared
1999  * cache file in the shared region via the shared_region_map_np() system call.
2000  *
2001  * If something's already mapped in the shared region, dyld will check if it
2002  * matches the shared cache it would like to use for that process.
2003  * If it matches, evrything's ready and the process can proceed and use the
2004  * shared region.
2005  * If it doesn't match, dyld will unmap the shared region and map the shared
2006  * cache into the process's address space via mmap().
2007  *
2008  * A NULL pointer argument can be used by dyld to indicate it has unmapped
2009  * the shared region. We will remove the shared_region reference from the task.
2010  *
2011  * ERROR VALUES
2012  * EINVAL	no shared region
2013  * ENOMEM	shared region is empty
2014  * EFAULT	bad address for "start_address"
2015  */
2016 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2017 shared_region_check_np(
2018 	__unused struct proc                    *p,
2019 	struct shared_region_check_np_args      *uap,
2020 	__unused int                            *retvalp)
2021 {
2022 	vm_shared_region_t      shared_region;
2023 	mach_vm_offset_t        start_address = 0;
2024 	int                     error = 0;
2025 	kern_return_t           kr;
2026 	task_t                  task = current_task();
2027 
2028 	SHARED_REGION_TRACE_DEBUG(
2029 		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2030 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2031 		proc_getpid(p), p->p_comm,
2032 		(uint64_t)uap->start_address));
2033 
2034 	/* retrieve the current tasks's shared region */
2035 	shared_region = vm_shared_region_get(task);
2036 	if (shared_region != NULL) {
2037 		/*
2038 		 * A NULL argument is used by dyld to indicate the task
2039 		 * has unmapped its shared region.
2040 		 */
2041 		if (uap->start_address == 0) {
2042 			vm_shared_region_set(task, NULL);
2043 		} else {
2044 			/* retrieve address of its first mapping... */
2045 			kr = vm_shared_region_start_address(shared_region, &start_address, task);
2046 			if (kr != KERN_SUCCESS) {
2047 				SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2048 				    "check_np(0x%llx) "
2049 				    "vm_shared_region_start_address() failed\n",
2050 				    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2051 				    proc_getpid(p), p->p_comm,
2052 				    (uint64_t)uap->start_address));
2053 				error = ENOMEM;
2054 			} else {
2055 #if __has_feature(ptrauth_calls)
2056 				/*
2057 				 * Remap any section of the shared library that
2058 				 * has authenticated pointers into private memory.
2059 				 */
2060 				if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2061 					SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2062 					    "check_np(0x%llx) "
2063 					    "vm_shared_region_auth_remap() failed\n",
2064 					    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2065 					    proc_getpid(p), p->p_comm,
2066 					    (uint64_t)uap->start_address));
2067 					error = ENOMEM;
2068 				}
2069 #endif /* __has_feature(ptrauth_calls) */
2070 
2071 				/* ... and give it to the caller */
2072 				if (error == 0) {
2073 					error = copyout(&start_address,
2074 					    (user_addr_t) uap->start_address,
2075 					    sizeof(start_address));
2076 					if (error != 0) {
2077 						SHARED_REGION_TRACE_ERROR(
2078 							("shared_region: %p [%d(%s)] "
2079 							"check_np(0x%llx) "
2080 							"copyout(0x%llx) error %d\n",
2081 							(void *)VM_KERNEL_ADDRPERM(current_thread()),
2082 							proc_getpid(p), p->p_comm,
2083 							(uint64_t)uap->start_address, (uint64_t)start_address,
2084 							error));
2085 					}
2086 				}
2087 			}
2088 		}
2089 		vm_shared_region_deallocate(shared_region);
2090 	} else {
2091 		/* no shared region ! */
2092 		error = EINVAL;
2093 	}
2094 
2095 	SHARED_REGION_TRACE_DEBUG(
2096 		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2097 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2098 		proc_getpid(p), p->p_comm,
2099 		(uint64_t)uap->start_address, (uint64_t)start_address, error));
2100 
2101 	return error;
2102 }
2103 
2104 
2105 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2106 shared_region_copyin(
2107 	struct proc  *p,
2108 	user_addr_t  user_addr,
2109 	unsigned int count,
2110 	unsigned int element_size,
2111 	void         *kernel_data)
2112 {
2113 	int             error = 0;
2114 	vm_size_t       size = count * element_size;
2115 
2116 	error = copyin(user_addr, kernel_data, size);
2117 	if (error) {
2118 		SHARED_REGION_TRACE_ERROR(
2119 			("shared_region: %p [%d(%s)] map(): "
2120 			"copyin(0x%llx, %ld) failed (error=%d)\n",
2121 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2122 			proc_getpid(p), p->p_comm,
2123 			(uint64_t)user_addr, (long)size, error));
2124 	}
2125 	return error;
2126 }
2127 
2128 /*
2129  * A reasonable upper limit to prevent overflow of allocation/copyin.
2130  */
2131 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2132 
2133 /* forward declaration */
2134 __attribute__((noinline))
2135 static void shared_region_map_and_slide_cleanup(
2136 	struct proc              *p,
2137 	uint32_t                 files_count,
2138 	struct _sr_file_mappings *sr_file_mappings,
2139 	struct vm_shared_region  *shared_region,
2140 	struct vnode             *scdir_vp);
2141 
2142 /*
2143  * Setup part of _shared_region_map_and_slide().
2144  * It had to be broken out of _shared_region_map_and_slide() to
2145  * prevent compiler inlining from blowing out the stack.
2146  */
2147 __attribute__((noinline))
2148 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode ** scdir_vp,struct vnode * rdir_vp)2149 shared_region_map_and_slide_setup(
2150 	struct proc                         *p,
2151 	uint32_t                            files_count,
2152 	struct shared_file_np               *files,
2153 	uint32_t                            mappings_count,
2154 	struct shared_file_mapping_slide_np *mappings,
2155 	struct _sr_file_mappings            **sr_file_mappings,
2156 	struct vm_shared_region             **shared_region_ptr,
2157 	struct vnode                        **scdir_vp,
2158 	struct vnode                        *rdir_vp)
2159 {
2160 	int                             error = 0;
2161 	struct _sr_file_mappings        *srfmp;
2162 	uint32_t                        mappings_next;
2163 	struct vnode_attr               va;
2164 	off_t                           fs;
2165 #if CONFIG_MACF
2166 	vm_prot_t                       maxprot = VM_PROT_ALL;
2167 #endif
2168 	uint32_t                        i;
2169 	struct vm_shared_region         *shared_region = NULL;
2170 	boolean_t                       is_driverkit = task_is_driver(current_task());
2171 	const char                      *expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2172 
2173 	SHARED_REGION_TRACE_DEBUG(
2174 		("shared_region: %p [%d(%s)] -> map\n",
2175 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2176 		proc_getpid(p), p->p_comm));
2177 
2178 	if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2179 		error = E2BIG;
2180 		goto done;
2181 	}
2182 	if (files_count == 0) {
2183 		error = EINVAL;
2184 		goto done;
2185 	}
2186 	*sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2187 	    Z_WAITOK | Z_ZERO);
2188 	if (*sr_file_mappings == NULL) {
2189 		error = ENOMEM;
2190 		goto done;
2191 	}
2192 	mappings_next = 0;
2193 	for (i = 0; i < files_count; i++) {
2194 		srfmp = &(*sr_file_mappings)[i];
2195 		srfmp->fd = files[i].sf_fd;
2196 		srfmp->mappings_count = files[i].sf_mappings_count;
2197 		srfmp->mappings = &mappings[mappings_next];
2198 		mappings_next += srfmp->mappings_count;
2199 		if (mappings_next > mappings_count) {
2200 			error = EINVAL;
2201 			goto done;
2202 		}
2203 		srfmp->slide = files[i].sf_slide;
2204 	}
2205 
2206 	if (scdir_enforce) {
2207 		/* get vnode for expected_scdir_path */
2208 		error = vnode_lookup(expected_scdir_path, 0, scdir_vp, vfs_context_current());
2209 		if (error) {
2210 			SHARED_REGION_TRACE_ERROR(
2211 				("shared_region: %p [%d(%s)]: "
2212 				"vnode_lookup(%s) failed (error=%d)\n",
2213 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2214 				proc_getpid(p), p->p_comm,
2215 				expected_scdir_path, error));
2216 			goto done;
2217 		}
2218 	}
2219 
2220 	/* get the process's shared region (setup in vm_map_exec()) */
2221 	shared_region = vm_shared_region_trim_and_get(current_task());
2222 	*shared_region_ptr = shared_region;
2223 	if (shared_region == NULL) {
2224 		SHARED_REGION_TRACE_ERROR(
2225 			("shared_region: %p [%d(%s)] map(): "
2226 			"no shared region\n",
2227 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2228 			proc_getpid(p), p->p_comm));
2229 		error = EINVAL;
2230 		goto done;
2231 	}
2232 
2233 	/*
2234 	 * Check the shared region matches the current root
2235 	 * directory of this process.  Deny the mapping to
2236 	 * avoid tainting the shared region with something that
2237 	 * doesn't quite belong into it.
2238 	 */
2239 	struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2240 	if (sr_vnode != NULL ?  rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2241 		SHARED_REGION_TRACE_ERROR(
2242 			("shared_region: map(%p) root_dir mismatch\n",
2243 			(void *)VM_KERNEL_ADDRPERM(current_thread())));
2244 		error = EPERM;
2245 		goto done;
2246 	}
2247 
2248 
2249 	for (srfmp = &(*sr_file_mappings)[0];
2250 	    srfmp < &(*sr_file_mappings)[files_count];
2251 	    srfmp++) {
2252 		if (srfmp->mappings_count == 0) {
2253 			/* no mappings here... */
2254 			continue;
2255 		}
2256 
2257 		/* get file structure from file descriptor */
2258 		error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2259 		if (error) {
2260 			SHARED_REGION_TRACE_ERROR(
2261 				("shared_region: %p [%d(%s)] map: "
2262 				"fd=%d lookup failed (error=%d)\n",
2263 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2264 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2265 			goto done;
2266 		}
2267 
2268 		/* we need at least read permission on the file */
2269 		if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2270 			SHARED_REGION_TRACE_ERROR(
2271 				("shared_region: %p [%d(%s)] map: "
2272 				"fd=%d not readable\n",
2273 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2274 				proc_getpid(p), p->p_comm, srfmp->fd));
2275 			error = EPERM;
2276 			goto done;
2277 		}
2278 
2279 		/* get vnode from file structure */
2280 		error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2281 		if (error) {
2282 			SHARED_REGION_TRACE_ERROR(
2283 				("shared_region: %p [%d(%s)] map: "
2284 				"fd=%d getwithref failed (error=%d)\n",
2285 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2286 				proc_getpid(p), p->p_comm, srfmp->fd, error));
2287 			goto done;
2288 		}
2289 		srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2290 
2291 		/* make sure the vnode is a regular file */
2292 		if (srfmp->vp->v_type != VREG) {
2293 			SHARED_REGION_TRACE_ERROR(
2294 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2295 				"not a file (type=%d)\n",
2296 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2297 				proc_getpid(p), p->p_comm,
2298 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2299 				srfmp->vp->v_name, srfmp->vp->v_type));
2300 			error = EINVAL;
2301 			goto done;
2302 		}
2303 
2304 #if CONFIG_MACF
2305 		/* pass in 0 for the offset argument because AMFI does not need the offset
2306 		 *       of the shared cache */
2307 		error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2308 		    srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
2309 		if (error) {
2310 			goto done;
2311 		}
2312 #endif /* MAC */
2313 
2314 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2315 		/*
2316 		 * Check if the shared cache is in the trust cache;
2317 		 * if so, we can skip the root ownership check.
2318 		 */
2319 #if DEVELOPMENT || DEBUG
2320 		/*
2321 		 * Skip both root ownership and trust cache check if
2322 		 * enforcement is disabled.
2323 		 */
2324 		if (!cs_system_enforcement()) {
2325 			goto after_root_check;
2326 		}
2327 #endif /* DEVELOPMENT || DEBUG */
2328 		struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2329 		if (blob == NULL) {
2330 			SHARED_REGION_TRACE_ERROR(
2331 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2332 				"missing CS blob\n",
2333 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2334 				proc_getpid(p), p->p_comm,
2335 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2336 				srfmp->vp->v_name));
2337 			goto root_check;
2338 		}
2339 		const uint8_t *cdhash = csblob_get_cdhash(blob);
2340 		if (cdhash == NULL) {
2341 			SHARED_REGION_TRACE_ERROR(
2342 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2343 				"missing cdhash\n",
2344 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2345 				proc_getpid(p), p->p_comm,
2346 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2347 				srfmp->vp->v_name));
2348 			goto root_check;
2349 		}
2350 		uint32_t result = pmap_lookup_in_static_trust_cache(cdhash);
2351 		boolean_t in_trust_cache = result & (TC_LOOKUP_FOUND << TC_LOOKUP_RESULT_SHIFT);
2352 		if (!in_trust_cache) {
2353 			SHARED_REGION_TRACE_ERROR(
2354 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2355 				"not in trust cache\n",
2356 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2357 				proc_getpid(p), p->p_comm,
2358 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2359 				srfmp->vp->v_name));
2360 			goto root_check;
2361 		}
2362 		goto after_root_check;
2363 root_check:
2364 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2365 
2366 		/* The shared cache file must be owned by root */
2367 		VATTR_INIT(&va);
2368 		VATTR_WANTED(&va, va_uid);
2369 		error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2370 		if (error) {
2371 			SHARED_REGION_TRACE_ERROR(
2372 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2373 				"vnode_getattr(%p) failed (error=%d)\n",
2374 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2375 				proc_getpid(p), p->p_comm,
2376 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2377 				srfmp->vp->v_name,
2378 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2379 				error));
2380 			goto done;
2381 		}
2382 		if (va.va_uid != 0) {
2383 			SHARED_REGION_TRACE_ERROR(
2384 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2385 				"owned by uid=%d instead of 0\n",
2386 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2387 				proc_getpid(p), p->p_comm,
2388 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2389 				srfmp->vp->v_name, va.va_uid));
2390 			error = EPERM;
2391 			goto done;
2392 		}
2393 
2394 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2395 after_root_check:
2396 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2397 
2398 #if CONFIG_CSR
2399 		if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2400 			VATTR_INIT(&va);
2401 			VATTR_WANTED(&va, va_flags);
2402 			error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2403 			if (error) {
2404 				SHARED_REGION_TRACE_ERROR(
2405 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2406 					"vnode_getattr(%p) failed (error=%d)\n",
2407 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2408 					proc_getpid(p), p->p_comm,
2409 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2410 					srfmp->vp->v_name,
2411 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2412 					error));
2413 				goto done;
2414 			}
2415 
2416 			if (!(va.va_flags & SF_RESTRICTED)) {
2417 				/*
2418 				 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2419 				 * the shared cache file is NOT SIP-protected, so reject the
2420 				 * mapping request
2421 				 */
2422 				SHARED_REGION_TRACE_ERROR(
2423 					("shared_region: %p [%d(%s)] map(%p:'%s'), "
2424 					"vnode is not SIP-protected. \n",
2425 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2426 					proc_getpid(p), p->p_comm,
2427 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2428 					srfmp->vp->v_name));
2429 				error = EPERM;
2430 				goto done;
2431 			}
2432 		}
2433 #else /* CONFIG_CSR */
2434 		/* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */
2435 
2436 		assert(rdir_vp != NULL);
2437 		if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2438 			SHARED_REGION_TRACE_ERROR(
2439 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2440 				"not on process's root volume\n",
2441 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2442 				proc_getpid(p), p->p_comm,
2443 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2444 				srfmp->vp->v_name));
2445 			error = EPERM;
2446 			goto done;
2447 		}
2448 #endif /* CONFIG_CSR */
2449 
2450 		if (scdir_enforce) {
2451 			/* ensure parent is scdir_vp */
2452 			assert(*scdir_vp != NULL);
2453 			if (vnode_parent(srfmp->vp) != *scdir_vp) {
2454 				SHARED_REGION_TRACE_ERROR(
2455 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2456 					"shared cache file not in %s\n",
2457 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2458 					proc_getpid(p), p->p_comm,
2459 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2460 					srfmp->vp->v_name, expected_scdir_path));
2461 				error = EPERM;
2462 				goto done;
2463 			}
2464 		}
2465 
2466 		/* get vnode size */
2467 		error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2468 		if (error) {
2469 			SHARED_REGION_TRACE_ERROR(
2470 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2471 				"vnode_size(%p) failed (error=%d)\n",
2472 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2473 				proc_getpid(p), p->p_comm,
2474 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2475 				srfmp->vp->v_name,
2476 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2477 			goto done;
2478 		}
2479 		srfmp->file_size = fs;
2480 
2481 		/* get the file's memory object handle */
2482 		srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2483 		if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2484 			SHARED_REGION_TRACE_ERROR(
2485 				("shared_region: %p [%d(%s)] map(%p:'%s'): "
2486 				"no memory object\n",
2487 				(void *)VM_KERNEL_ADDRPERM(current_thread()),
2488 				proc_getpid(p), p->p_comm,
2489 				(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2490 				srfmp->vp->v_name));
2491 			error = EINVAL;
2492 			goto done;
2493 		}
2494 
2495 		/* check that the mappings are properly covered by code signatures */
2496 		if (!cs_system_enforcement()) {
2497 			/* code signing is not enforced: no need to check */
2498 		} else {
2499 			for (i = 0; i < srfmp->mappings_count; i++) {
2500 				if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2501 					/* zero-filled mapping: not backed by the file */
2502 					continue;
2503 				}
2504 				if (ubc_cs_is_range_codesigned(srfmp->vp,
2505 				    srfmp->mappings[i].sms_file_offset,
2506 				    srfmp->mappings[i].sms_size)) {
2507 					/* this mapping is fully covered by code signatures */
2508 					continue;
2509 				}
2510 				SHARED_REGION_TRACE_ERROR(
2511 					("shared_region: %p [%d(%s)] map(%p:'%s'): "
2512 					"mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2513 					"is not code-signed\n",
2514 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2515 					proc_getpid(p), p->p_comm,
2516 					(void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2517 					srfmp->vp->v_name,
2518 					i, srfmp->mappings_count,
2519 					srfmp->mappings[i].sms_address,
2520 					srfmp->mappings[i].sms_size,
2521 					srfmp->mappings[i].sms_file_offset,
2522 					srfmp->mappings[i].sms_max_prot,
2523 					srfmp->mappings[i].sms_init_prot));
2524 				error = EINVAL;
2525 				goto done;
2526 			}
2527 		}
2528 	}
2529 done:
2530 	if (error != 0) {
2531 		shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region, *scdir_vp);
2532 		*sr_file_mappings = NULL;
2533 		*shared_region_ptr = NULL;
2534 		*scdir_vp = NULL;
2535 	}
2536 	return error;
2537 }
2538 
2539 /*
2540  * shared_region_map_np()
2541  *
2542  * This system call is intended for dyld.
2543  *
2544  * dyld uses this to map a shared cache file into a shared region.
2545  * This is usually done only the first time a shared cache is needed.
2546  * Subsequent processes will just use the populated shared region without
2547  * requiring any further setup.
2548  */
2549 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2550 _shared_region_map_and_slide(
2551 	struct proc                         *p,
2552 	uint32_t                            files_count,
2553 	struct shared_file_np               *files,
2554 	uint32_t                            mappings_count,
2555 	struct shared_file_mapping_slide_np *mappings)
2556 {
2557 	int                             error = 0;
2558 	kern_return_t                   kr = KERN_SUCCESS;
2559 	struct _sr_file_mappings        *sr_file_mappings = NULL;
2560 	struct vnode                    *scdir_vp = NULL;
2561 	struct vnode                    *rdir_vp = NULL;
2562 	struct vm_shared_region         *shared_region = NULL;
2563 
2564 	/*
2565 	 * Get a reference to the current proc's root dir.
2566 	 * Need this to prevent racing with chroot.
2567 	 */
2568 	proc_fdlock(p);
2569 	rdir_vp = p->p_fd.fd_rdir;
2570 	if (rdir_vp == NULL) {
2571 		rdir_vp = rootvnode;
2572 	}
2573 	assert(rdir_vp != NULL);
2574 	vnode_get(rdir_vp);
2575 	proc_fdunlock(p);
2576 
2577 	/*
2578 	 * Turn files, mappings into sr_file_mappings and other setup.
2579 	 */
2580 	error = shared_region_map_and_slide_setup(p, files_count,
2581 	    files, mappings_count, mappings,
2582 	    &sr_file_mappings, &shared_region, &scdir_vp, rdir_vp);
2583 	if (error != 0) {
2584 		vnode_put(rdir_vp);
2585 		return error;
2586 	}
2587 
2588 	/* map the file(s) into that shared region's submap */
2589 	kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2590 	if (kr != KERN_SUCCESS) {
2591 		SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2592 		    "vm_shared_region_map_file() failed kr=0x%x\n",
2593 		    (void *)VM_KERNEL_ADDRPERM(current_thread()),
2594 		    proc_getpid(p), p->p_comm, kr));
2595 	}
2596 
2597 	/* convert kern_return_t to errno */
2598 	switch (kr) {
2599 	case KERN_SUCCESS:
2600 		error = 0;
2601 		break;
2602 	case KERN_INVALID_ADDRESS:
2603 		error = EFAULT;
2604 		break;
2605 	case KERN_PROTECTION_FAILURE:
2606 		error = EPERM;
2607 		break;
2608 	case KERN_NO_SPACE:
2609 		error = ENOMEM;
2610 		break;
2611 	case KERN_FAILURE:
2612 	case KERN_INVALID_ARGUMENT:
2613 	default:
2614 		error = EINVAL;
2615 		break;
2616 	}
2617 
2618 	/*
2619 	 * Mark that this process is now using split libraries.
2620 	 */
2621 	if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2622 		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2623 	}
2624 
2625 	vnode_put(rdir_vp);
2626 	shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region, scdir_vp);
2627 
2628 	SHARED_REGION_TRACE_DEBUG(
2629 		("shared_region: %p [%d(%s)] <- map\n",
2630 		(void *)VM_KERNEL_ADDRPERM(current_thread()),
2631 		proc_getpid(p), p->p_comm));
2632 
2633 	return error;
2634 }
2635 
2636 /*
2637  * Clean up part of _shared_region_map_and_slide()
2638  * It had to be broken out of _shared_region_map_and_slide() to
2639  * prevent compiler inlining from blowing out the stack.
2640  */
2641 __attribute__((noinline))
2642 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region,struct vnode * scdir_vp)2643 shared_region_map_and_slide_cleanup(
2644 	struct proc              *p,
2645 	uint32_t                 files_count,
2646 	struct _sr_file_mappings *sr_file_mappings,
2647 	struct vm_shared_region  *shared_region,
2648 	struct vnode             *scdir_vp)
2649 {
2650 	struct _sr_file_mappings *srfmp;
2651 	struct vnode_attr        va;
2652 
2653 	if (sr_file_mappings != NULL) {
2654 		for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2655 			if (srfmp->vp != NULL) {
2656 				vnode_lock_spin(srfmp->vp);
2657 				srfmp->vp->v_flag |= VSHARED_DYLD;
2658 				vnode_unlock(srfmp->vp);
2659 
2660 				/* update the vnode's access time */
2661 				if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2662 					VATTR_INIT(&va);
2663 					nanotime(&va.va_access_time);
2664 					VATTR_SET_ACTIVE(&va, va_access_time);
2665 					vnode_setattr(srfmp->vp, &va, vfs_context_current());
2666 				}
2667 
2668 #if NAMEDSTREAMS
2669 				/*
2670 				 * If the shared cache is compressed, it may
2671 				 * have a namedstream vnode instantiated for
2672 				 * for it. That namedstream vnode will also
2673 				 * have to be marked with VSHARED_DYLD.
2674 				 */
2675 				if (vnode_hasnamedstreams(srfmp->vp)) {
2676 					vnode_t svp;
2677 					if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2678 					    NS_OPEN, 0, vfs_context_kernel()) == 0) {
2679 						vnode_lock_spin(svp);
2680 						svp->v_flag |= VSHARED_DYLD;
2681 						vnode_unlock(svp);
2682 						vnode_put(svp);
2683 					}
2684 				}
2685 #endif /* NAMEDSTREAMS */
2686 				/*
2687 				 * release the vnode...
2688 				 * ubc_map() still holds it for us in the non-error case
2689 				 */
2690 				(void) vnode_put(srfmp->vp);
2691 				srfmp->vp = NULL;
2692 			}
2693 			if (srfmp->fp != NULL) {
2694 				/* release the file descriptor */
2695 				fp_drop(p, srfmp->fd, srfmp->fp, 0);
2696 				srfmp->fp = NULL;
2697 			}
2698 		}
2699 		kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2700 	}
2701 
2702 	if (scdir_vp != NULL) {
2703 		(void)vnode_put(scdir_vp);
2704 		scdir_vp = NULL;
2705 	}
2706 
2707 	if (shared_region != NULL) {
2708 		vm_shared_region_deallocate(shared_region);
2709 	}
2710 }
2711 
2712 
2713 /*
2714  * For each file mapped, we may have mappings for:
2715  *    TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2716  * so let's round up to 8 mappings per file.
2717  */
2718 #define SFM_MAX       (_SR_FILE_MAPPINGS_MAX_FILES * 8)     /* max mapping structs allowed to pass in */
2719 
2720 /*
2721  * This is the older interface that dyld uses to map in the shared
2722  * library. dyld is slowly moving to the new shared_region_map_and_slide_2_np()
2723  * call as needed.
2724  */
2725 int
shared_region_map_and_slide_np(struct proc * p,struct shared_region_map_and_slide_np_args * uap,__unused int * retvalp)2726 shared_region_map_and_slide_np(
2727 	struct proc                                *p,
2728 	struct shared_region_map_and_slide_np_args *uap,
2729 	__unused int                               *retvalp)
2730 {
2731 	unsigned int                        mappings_count = uap->count;
2732 	unsigned int                        m;
2733 	uint32_t                            slide = uap->slide;
2734 	struct shared_file_np               shared_files[1];
2735 	struct shared_file_mapping_np       legacy_mapping;
2736 	struct shared_file_mapping_slide_np *mappings = NULL;
2737 	kern_return_t                       kr = KERN_SUCCESS;
2738 
2739 	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
2740 		if (kr == KERN_INVALID_ARGUMENT) {
2741 			/*
2742 			 * This will happen if we request sliding again
2743 			 * with the same slide value that was used earlier
2744 			 * for the very first sliding.
2745 			 */
2746 			kr = KERN_SUCCESS;
2747 		}
2748 		goto done;
2749 	}
2750 
2751 	if (mappings_count == 0) {
2752 		SHARED_REGION_TRACE_INFO(
2753 			("shared_region: %p [%d(%s)] map(): "
2754 			"no mappings\n",
2755 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2756 			proc_getpid(p), p->p_comm));
2757 		kr = 0; /* no mappings: we're done ! */
2758 		goto done;
2759 	} else if (mappings_count <= SFM_MAX) {
2760 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2761 		if (mappings == NULL) {
2762 			kr = KERN_RESOURCE_SHORTAGE;
2763 			goto done;
2764 		}
2765 	} else {
2766 		SHARED_REGION_TRACE_ERROR(
2767 			("shared_region: %p [%d(%s)] map(): "
2768 			"too many mappings (%d) max %d\n",
2769 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2770 			proc_getpid(p), p->p_comm,
2771 			mappings_count, SFM_MAX));
2772 		kr = KERN_FAILURE;
2773 		goto done;
2774 	}
2775 
2776 	/*
2777 	 * Read in the mappings and translate to new format.
2778 	 */
2779 	for (m = 0; m < mappings_count; ++m) {
2780 		user_addr_t from_uaddr = uap->mappings + (m * sizeof(struct shared_file_mapping_np));
2781 		kr = shared_region_copyin(p, from_uaddr, 1, sizeof(legacy_mapping), &legacy_mapping);
2782 		if (kr != 0) {
2783 			goto done;
2784 		}
2785 		mappings[m].sms_address = legacy_mapping.sfm_address;
2786 		mappings[m].sms_size = legacy_mapping.sfm_size;
2787 		mappings[m].sms_file_offset = legacy_mapping.sfm_file_offset;
2788 		mappings[m].sms_max_prot = legacy_mapping.sfm_max_prot;
2789 		mappings[m].sms_init_prot = legacy_mapping.sfm_init_prot;
2790 		mappings[m].sms_slide_size = uap->slide_size;
2791 		mappings[m].sms_slide_start = uap->slide_start;
2792 	}
2793 
2794 	bzero(shared_files, sizeof(shared_files));
2795 	shared_files[0].sf_fd = uap->fd;
2796 	shared_files[0].sf_mappings_count = mappings_count;
2797 	shared_files[0].sf_slide = slide;
2798 
2799 	kr = _shared_region_map_and_slide(p,
2800 	    1,                 /* # of files to map */
2801 	    &shared_files[0],  /* files to map */
2802 	    mappings_count,
2803 	    mappings);
2804 
2805 done:
2806 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2807 	return kr;
2808 }
2809 
2810 /*
2811  * This is the new interface for setting up shared region mappings.
2812  *
2813  * The slide used for shared regions setup using this interface is done differently
2814  * from the old interface. The slide value passed in the shared_files_np represents
2815  * a max value. The kernel will choose a random value based on that, then use it
2816  * for all shared regions.
2817  */
2818 #if defined (__x86_64__)
2819 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2820 #else
2821 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2822 #endif
2823 
2824 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2825 shared_region_map_and_slide_2_np(
2826 	struct proc                                  *p,
2827 	struct shared_region_map_and_slide_2_np_args *uap,
2828 	__unused int                                 *retvalp)
2829 {
2830 	unsigned int                  files_count;
2831 	struct shared_file_np         *shared_files = NULL;
2832 	unsigned int                  mappings_count;
2833 	struct shared_file_mapping_slide_np *mappings = NULL;
2834 	kern_return_t                 kr = KERN_SUCCESS;
2835 
2836 	files_count = uap->files_count;
2837 	mappings_count = uap->mappings_count;
2838 
2839 	if (files_count == 0) {
2840 		SHARED_REGION_TRACE_INFO(
2841 			("shared_region: %p [%d(%s)] map(): "
2842 			"no files\n",
2843 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2844 			proc_getpid(p), p->p_comm));
2845 		kr = 0; /* no files to map: we're done ! */
2846 		goto done;
2847 	} else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2848 		shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2849 		if (shared_files == NULL) {
2850 			kr = KERN_RESOURCE_SHORTAGE;
2851 			goto done;
2852 		}
2853 	} else {
2854 		SHARED_REGION_TRACE_ERROR(
2855 			("shared_region: %p [%d(%s)] map(): "
2856 			"too many files (%d) max %d\n",
2857 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2858 			proc_getpid(p), p->p_comm,
2859 			files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2860 		kr = KERN_FAILURE;
2861 		goto done;
2862 	}
2863 
2864 	if (mappings_count == 0) {
2865 		SHARED_REGION_TRACE_INFO(
2866 			("shared_region: %p [%d(%s)] map(): "
2867 			"no mappings\n",
2868 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2869 			proc_getpid(p), p->p_comm));
2870 		kr = 0; /* no mappings: we're done ! */
2871 		goto done;
2872 	} else if (mappings_count <= SFM_MAX) {
2873 		mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2874 		if (mappings == NULL) {
2875 			kr = KERN_RESOURCE_SHORTAGE;
2876 			goto done;
2877 		}
2878 	} else {
2879 		SHARED_REGION_TRACE_ERROR(
2880 			("shared_region: %p [%d(%s)] map(): "
2881 			"too many mappings (%d) max %d\n",
2882 			(void *)VM_KERNEL_ADDRPERM(current_thread()),
2883 			proc_getpid(p), p->p_comm,
2884 			mappings_count, SFM_MAX));
2885 		kr = KERN_FAILURE;
2886 		goto done;
2887 	}
2888 
2889 	kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2890 	if (kr != KERN_SUCCESS) {
2891 		goto done;
2892 	}
2893 
2894 	kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2895 	if (kr != KERN_SUCCESS) {
2896 		goto done;
2897 	}
2898 
2899 	uint32_t max_slide = shared_files[0].sf_slide;
2900 	uint32_t random_val;
2901 	uint32_t slide_amount;
2902 
2903 	if (max_slide != 0) {
2904 		read_random(&random_val, sizeof random_val);
2905 		slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2906 	} else {
2907 		slide_amount = 0;
2908 	}
2909 #if DEVELOPMENT || DEBUG
2910 	extern bool bootarg_disable_aslr;
2911 	if (bootarg_disable_aslr) {
2912 		slide_amount = 0;
2913 	}
2914 #endif /* DEVELOPMENT || DEBUG */
2915 
2916 	/*
2917 	 * Fix up the mappings to reflect the desired slide.
2918 	 */
2919 	unsigned int f;
2920 	unsigned int m = 0;
2921 	unsigned int i;
2922 	for (f = 0; f < files_count; ++f) {
2923 		shared_files[f].sf_slide = slide_amount;
2924 		for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2925 			if (m >= mappings_count) {
2926 				SHARED_REGION_TRACE_ERROR(
2927 					("shared_region: %p [%d(%s)] map(): "
2928 					"mapping count argument was too small\n",
2929 					(void *)VM_KERNEL_ADDRPERM(current_thread()),
2930 					proc_getpid(p), p->p_comm));
2931 				kr = KERN_FAILURE;
2932 				goto done;
2933 			}
2934 			mappings[m].sms_address += slide_amount;
2935 			if (mappings[m].sms_slide_size != 0) {
2936 				mappings[m].sms_slide_start += slide_amount;
2937 			}
2938 		}
2939 	}
2940 
2941 	kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2942 done:
2943 	kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2944 	kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2945 	return kr;
2946 }
2947 
2948 /* sysctl overflow room */
2949 
2950 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2951     (int *) &page_size, 0, "vm page size");
2952 
2953 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2954  *       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2955  *       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2956 extern unsigned int     vm_page_free_target;
2957 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2958     &vm_page_free_target, 0, "Pageout daemon free target");
2959 
2960 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2961     &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2962 
2963 static int
2964 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2965 {
2966 #pragma unused(oidp, arg1, arg2)
2967 	unsigned int page_free_wanted;
2968 
2969 	page_free_wanted = mach_vm_ctl_page_free_wanted();
2970 	return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2971 }
2972 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2973     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2974     0, 0, vm_ctl_page_free_wanted, "I", "");
2975 
2976 extern unsigned int     vm_page_purgeable_count;
2977 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2978     &vm_page_purgeable_count, 0, "Purgeable page count");
2979 
2980 extern unsigned int     vm_page_purgeable_wired_count;
2981 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2982     &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2983 
2984 extern unsigned int vm_page_kern_lpage_count;
2985 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2986     &vm_page_kern_lpage_count, 0, "kernel used large pages");
2987 
2988 #if DEVELOPMENT || DEBUG
2989 #if __ARM_MIXED_PAGE_SIZE__
2990 static int vm_mixed_pagesize_supported = 1;
2991 #else
2992 static int vm_mixed_pagesize_supported = 0;
2993 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2994 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2995     &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
2996 
2997 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
2998 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
2999 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3000     &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3001 
3002 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3003     &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3004 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3005     &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3006 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3007     &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3008 
3009 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3010     &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3011 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3012     &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3013 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3014     &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated");         /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3015 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3016     &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3017 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3018     &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3019 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3020     &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "");         /* sum of next two */
3021 #endif /* DEVELOPMENT || DEBUG */
3022 
3023 extern int madvise_free_debug;
3024 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3025     &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3026 
3027 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3028     &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3029 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3030     &vm_page_stats_reusable.reusable_pages_success, "");
3031 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3032     &vm_page_stats_reusable.reusable_pages_failure, "");
3033 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3034     &vm_page_stats_reusable.reusable_pages_shared, "");
3035 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3036     &vm_page_stats_reusable.all_reusable_calls, "");
3037 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3038     &vm_page_stats_reusable.partial_reusable_calls, "");
3039 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3040     &vm_page_stats_reusable.reuse_pages_success, "");
3041 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3042     &vm_page_stats_reusable.reuse_pages_failure, "");
3043 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3044     &vm_page_stats_reusable.all_reuse_calls, "");
3045 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3046     &vm_page_stats_reusable.partial_reuse_calls, "");
3047 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3048     &vm_page_stats_reusable.can_reuse_success, "");
3049 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3050     &vm_page_stats_reusable.can_reuse_failure, "");
3051 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3052     &vm_page_stats_reusable.reusable_reclaimed, "");
3053 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3054     &vm_page_stats_reusable.reusable_nonwritable, "");
3055 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3056     &vm_page_stats_reusable.reusable_shared, "");
3057 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3058     &vm_page_stats_reusable.free_shared, "");
3059 
3060 
3061 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3062 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3063 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3064 
3065 extern unsigned int vm_page_cleaned_count;
3066 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3067 
3068 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3069 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3070 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3071 
3072 /* pageout counts */
3073 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3074 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3075 
3076 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3077 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3078 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3079 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3080 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3081 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3082 
3083 
3084 /* counts of pages prefaulted when entering a memory object */
3085 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3086 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3087 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3088 
3089 #if defined (__x86_64__)
3090 extern unsigned int vm_clump_promote_threshold;
3091 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3092 #if DEVELOPMENT || DEBUG
3093 extern unsigned long vm_clump_stats[];
3094 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3095 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3096 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3097 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3098 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3099 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3100 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3101 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3102 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3103 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3104 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3105 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3106 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3107 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3108 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3109 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3110 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3111 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3112 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3113 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3114 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3115 #endif  /* if DEVELOPMENT || DEBUG */
3116 #endif  /* #if defined (__x86_64__) */
3117 
3118 #if CONFIG_SECLUDED_MEMORY
3119 
3120 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3121 extern unsigned int vm_page_secluded_target;
3122 extern unsigned int vm_page_secluded_count;
3123 extern unsigned int vm_page_secluded_count_free;
3124 extern unsigned int vm_page_secluded_count_inuse;
3125 extern unsigned int vm_page_secluded_count_over_target;
3126 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3127 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3128 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3129 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3130 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3131 
3132 extern struct vm_page_secluded_data vm_page_secluded;
3133 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3134 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3135 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3136 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3137 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3138 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3139 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3140 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3141 
3142 #endif /* CONFIG_SECLUDED_MEMORY */
3143 
3144 #include <kern/thread.h>
3145 #include <sys/user.h>
3146 
3147 void vm_pageout_io_throttle(void);
3148 
3149 void
vm_pageout_io_throttle(void)3150 vm_pageout_io_throttle(void)
3151 {
3152 	struct uthread *uthread = current_uthread();
3153 
3154 	/*
3155 	 * thread is marked as a low priority I/O type
3156 	 * and the I/O we issued while in this cleaning operation
3157 	 * collided with normal I/O operations... we'll
3158 	 * delay in order to mitigate the impact of this
3159 	 * task on the normal operation of the system
3160 	 */
3161 
3162 	if (uthread->uu_lowpri_window) {
3163 		throttle_lowpri_io(1);
3164 	}
3165 }
3166 
3167 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3168 vm_pressure_monitor(
3169 	__unused struct proc *p,
3170 	struct vm_pressure_monitor_args *uap,
3171 	int *retval)
3172 {
3173 	kern_return_t   kr;
3174 	uint32_t        pages_reclaimed;
3175 	uint32_t        pages_wanted;
3176 
3177 	kr = mach_vm_pressure_monitor(
3178 		(boolean_t) uap->wait_for_pressure,
3179 		uap->nsecs_monitored,
3180 		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3181 		&pages_wanted);
3182 
3183 	switch (kr) {
3184 	case KERN_SUCCESS:
3185 		break;
3186 	case KERN_ABORTED:
3187 		return EINTR;
3188 	default:
3189 		return EINVAL;
3190 	}
3191 
3192 	if (uap->pages_reclaimed) {
3193 		if (copyout((void *)&pages_reclaimed,
3194 		    uap->pages_reclaimed,
3195 		    sizeof(pages_reclaimed)) != 0) {
3196 			return EFAULT;
3197 		}
3198 	}
3199 
3200 	*retval = (int) pages_wanted;
3201 	return 0;
3202 }
3203 
3204 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3205 kas_info(struct proc *p,
3206     struct kas_info_args *uap,
3207     int *retval __unused)
3208 {
3209 #ifndef CONFIG_KAS_INFO
3210 	(void)p;
3211 	(void)uap;
3212 	return ENOTSUP;
3213 #else /* CONFIG_KAS_INFO */
3214 	int                     selector = uap->selector;
3215 	user_addr_t     valuep = uap->value;
3216 	user_addr_t     sizep = uap->size;
3217 	user_size_t size, rsize;
3218 	int                     error;
3219 
3220 	if (!kauth_cred_issuser(kauth_cred_get())) {
3221 		return EPERM;
3222 	}
3223 
3224 #if CONFIG_MACF
3225 	error = mac_system_check_kas_info(kauth_cred_get(), selector);
3226 	if (error) {
3227 		return error;
3228 	}
3229 #endif
3230 
3231 	if (IS_64BIT_PROCESS(p)) {
3232 		user64_size_t size64;
3233 		error = copyin(sizep, &size64, sizeof(size64));
3234 		size = (user_size_t)size64;
3235 	} else {
3236 		user32_size_t size32;
3237 		error = copyin(sizep, &size32, sizeof(size32));
3238 		size = (user_size_t)size32;
3239 	}
3240 	if (error) {
3241 		return error;
3242 	}
3243 
3244 	switch (selector) {
3245 	case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3246 	{
3247 		uint64_t slide = vm_kernel_slide;
3248 
3249 		if (sizeof(slide) != size) {
3250 			return EINVAL;
3251 		}
3252 
3253 		error = copyout(&slide, valuep, sizeof(slide));
3254 		if (error) {
3255 			return error;
3256 		}
3257 		rsize = size;
3258 	}
3259 	break;
3260 	case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3261 	{
3262 		uint32_t i;
3263 		kernel_mach_header_t *mh = &_mh_execute_header;
3264 		struct load_command *cmd;
3265 		cmd = (struct load_command*) &mh[1];
3266 		uint64_t *bases;
3267 		rsize = mh->ncmds * sizeof(uint64_t);
3268 
3269 		/*
3270 		 * Return the size if no data was passed
3271 		 */
3272 		if (valuep == 0) {
3273 			break;
3274 		}
3275 
3276 		if (rsize > size) {
3277 			return EINVAL;
3278 		}
3279 
3280 		bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3281 
3282 		for (i = 0; i < mh->ncmds; i++) {
3283 			if (cmd->cmd == LC_SEGMENT_KERNEL) {
3284 				__IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3285 				bases[i] = (uint64_t)sg->vmaddr;
3286 			}
3287 			cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3288 		}
3289 
3290 		error = copyout(bases, valuep, rsize);
3291 
3292 		kfree_data(bases, rsize);
3293 
3294 		if (error) {
3295 			return error;
3296 		}
3297 	}
3298 	break;
3299 	default:
3300 		return EINVAL;
3301 	}
3302 
3303 	if (IS_64BIT_PROCESS(p)) {
3304 		user64_size_t size64 = (user64_size_t)rsize;
3305 		error = copyout(&size64, sizep, sizeof(size64));
3306 	} else {
3307 		user32_size_t size32 = (user32_size_t)rsize;
3308 		error = copyout(&size32, sizep, sizeof(size32));
3309 	}
3310 
3311 	return error;
3312 #endif /* CONFIG_KAS_INFO */
3313 }
3314 
3315 #if __has_feature(ptrauth_calls)
3316 /*
3317  * Generate a random pointer signing key that isn't 0.
3318  */
3319 uint64_t
generate_jop_key(void)3320 generate_jop_key(void)
3321 {
3322 	uint64_t key;
3323 
3324 	do {
3325 		read_random(&key, sizeof key);
3326 	} while (key == 0);
3327 	return key;
3328 }
3329 #endif /* __has_feature(ptrauth_calls) */
3330 
3331 
3332 #pragma clang diagnostic push
3333 #pragma clang diagnostic ignored "-Wcast-qual"
3334 #pragma clang diagnostic ignored "-Wunused-function"
3335 
3336 static void
asserts()3337 asserts()
3338 {
3339 	static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3340 	static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3341 }
3342 
3343 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3344 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3345 #pragma clang diagnostic pop
3346 
3347 extern uint32_t vm_page_pages;
3348 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3349 
3350 extern uint32_t vm_page_busy_absent_skipped;
3351 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3352 
3353 extern uint32_t vm_page_upl_tainted;
3354 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3355 
3356 extern uint32_t vm_page_iopl_tainted;
3357 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3358 
3359 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
3360 extern int vm_footprint_suspend_allowed;
3361 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3362 
3363 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3364 static int
3365 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3366 {
3367 #pragma unused(oidp, arg1, arg2)
3368 	int error = 0;
3369 	int new_value;
3370 
3371 	if (req->newptr == USER_ADDR_NULL) {
3372 		return 0;
3373 	}
3374 	error = SYSCTL_IN(req, &new_value, sizeof(int));
3375 	if (error) {
3376 		return error;
3377 	}
3378 	if (!vm_footprint_suspend_allowed) {
3379 		if (new_value != 0) {
3380 			/* suspends are not allowed... */
3381 			return 0;
3382 		}
3383 		/* ... but let resumes proceed */
3384 	}
3385 	DTRACE_VM2(footprint_suspend,
3386 	    vm_map_t, current_map(),
3387 	    int, new_value);
3388 
3389 	pmap_footprint_suspend(current_map(), new_value);
3390 
3391 	return 0;
3392 }
3393 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3394     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3395     0, 0, &sysctl_vm_footprint_suspend, "I", "");
3396 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
3397 
3398 extern uint64_t vm_map_corpse_footprint_count;
3399 extern uint64_t vm_map_corpse_footprint_size_avg;
3400 extern uint64_t vm_map_corpse_footprint_size_max;
3401 extern uint64_t vm_map_corpse_footprint_full;
3402 extern uint64_t vm_map_corpse_footprint_no_buf;
3403 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3404     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3405 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3406     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3407 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3408     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3409 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3410     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3411 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3412     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3413 
3414 
3415 extern uint64_t shared_region_pager_copied;
3416 extern uint64_t shared_region_pager_slid;
3417 extern uint64_t shared_region_pager_slid_error;
3418 extern uint64_t shared_region_pager_reclaimed;
3419 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3420     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3421 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3422     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3423 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3424     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3425 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3426     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3427 extern int shared_region_destroy_delay;
3428 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3429     CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3430 
3431 #if MACH_ASSERT
3432 extern int pmap_ledgers_panic_leeway;
3433 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3434 #endif /* MACH_ASSERT */
3435 
3436 
3437 extern uint64_t vm_map_lookup_locked_copy_slowly_count;
3438 extern uint64_t vm_map_lookup_locked_copy_slowly_size;
3439 extern uint64_t vm_map_lookup_locked_copy_slowly_max;
3440 extern uint64_t vm_map_lookup_locked_copy_slowly_restart;
3441 extern uint64_t vm_map_lookup_locked_copy_slowly_error;
3442 extern uint64_t vm_map_lookup_locked_copy_strategically_count;
3443 extern uint64_t vm_map_lookup_locked_copy_strategically_size;
3444 extern uint64_t vm_map_lookup_locked_copy_strategically_max;
3445 extern uint64_t vm_map_lookup_locked_copy_strategically_restart;
3446 extern uint64_t vm_map_lookup_locked_copy_strategically_error;
3447 extern uint64_t vm_map_lookup_locked_copy_shadow_count;
3448 extern uint64_t vm_map_lookup_locked_copy_shadow_size;
3449 extern uint64_t vm_map_lookup_locked_copy_shadow_max;
3450 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3451     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_count, "");
3452 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3453     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_size, "");
3454 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3455     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_max, "");
3456 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3457     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_restart, "");
3458 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3459     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_error, "");
3460 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3461     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_count, "");
3462 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3463     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_size, "");
3464 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3465     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_max, "");
3466 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3467     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_restart, "");
3468 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3469     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_error, "");
3470 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3471     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_count, "");
3472 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3473     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_size, "");
3474 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3475     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_max, "");
3476 
3477 extern int vm_protect_privileged_from_untrusted;
3478 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3479     CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3480 extern uint64_t vm_copied_on_read;
3481 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3482     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3483 
3484 extern int vm_shared_region_count;
3485 extern int vm_shared_region_peak;
3486 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3487     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3488 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3489     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3490 #if DEVELOPMENT || DEBUG
3491 extern unsigned int shared_region_pagers_resident_count;
3492 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3493     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3494 extern unsigned int shared_region_pagers_resident_peak;
3495 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3496     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3497 extern int shared_region_pager_count;
3498 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3499     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3500 #if __has_feature(ptrauth_calls)
3501 extern int shared_region_key_count;
3502 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3503     CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3504 extern int vm_shared_region_reslide_count;
3505 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3506     CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3507 #endif /* __has_feature(ptrauth_calls) */
3508 #endif /* DEVELOPMENT || DEBUG */
3509 
3510 #if MACH_ASSERT
3511 extern int debug4k_filter;
3512 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3513 extern int debug4k_panic_on_terminate;
3514 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3515 extern int debug4k_panic_on_exception;
3516 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3517 extern int debug4k_panic_on_misaligned_sharing;
3518 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3519 #endif /* MACH_ASSERT */
3520 
3521 extern uint64_t vm_map_set_size_limit_count;
3522 extern uint64_t vm_map_set_data_limit_count;
3523 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3524 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3525 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3526 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3527 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3528 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3529 
3530 extern uint64_t vm_fault_resilient_media_initiate;
3531 extern uint64_t vm_fault_resilient_media_retry;
3532 extern uint64_t vm_fault_resilient_media_proceed;
3533 extern uint64_t vm_fault_resilient_media_release;
3534 extern uint64_t vm_fault_resilient_media_abort1;
3535 extern uint64_t vm_fault_resilient_media_abort2;
3536 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3537 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3538 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3539 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3540 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3541 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3542 #if MACH_ASSERT
3543 extern int vm_fault_resilient_media_inject_error1_rate;
3544 extern int vm_fault_resilient_media_inject_error1;
3545 extern int vm_fault_resilient_media_inject_error2_rate;
3546 extern int vm_fault_resilient_media_inject_error2;
3547 extern int vm_fault_resilient_media_inject_error3_rate;
3548 extern int vm_fault_resilient_media_inject_error3;
3549 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3550 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3551 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3552 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3553 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3554 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3555 #endif /* MACH_ASSERT */
3556 
3557 /*
3558  * A sysctl which causes all existing shared regions to become stale. They
3559  * will no longer be used by anything new and will be torn down as soon as
3560  * the last existing user exits. A write of non-zero value causes that to happen.
3561  * This should only be used by launchd, so we check that this is initproc.
3562  */
3563 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3564 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3565 {
3566 	unsigned int value = 0;
3567 	int changed = 0;
3568 	int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3569 	if (error || !changed) {
3570 		return error;
3571 	}
3572 	if (current_proc() != initproc) {
3573 		return EPERM;
3574 	}
3575 
3576 	vm_shared_region_pivot();
3577 
3578 	return 0;
3579 }
3580 
3581 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3582     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3583     0, 0, shared_region_pivot, "I", "");
3584 
3585 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3586     &vmtc_total, 0, "total text page corruptions detected");
3587 
3588 /*
3589  * sysctl to return the number of pages on retired_pages_object
3590  */
3591 static int
3592 retired_pages_count SYSCTL_HANDLER_ARGS
3593 {
3594 #pragma unused(arg1, arg2, oidp)
3595 	extern uint32_t vm_retired_pages_count(void);
3596 	uint32_t value = vm_retired_pages_count();
3597 
3598 	return SYSCTL_OUT(req, &value, sizeof(value));
3599 }
3600 SYSCTL_PROC(_vm, OID_AUTO, retired_pages_count, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3601     0, 0, &retired_pages_count, "I", "");
3602 
3603 #if DEBUG || DEVELOPMENT
3604 /*
3605  * A sysctl that can be used to corrupt a text page with an illegal instruction.
3606  * Used for testing text page self healing.
3607  */
3608 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3609 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3610 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3611 {
3612 	uint64_t value = 0;
3613 	int error = sysctl_handle_quad(oidp, &value, 0, req);
3614 	if (error || !req->newptr) {
3615 		return error;
3616 	}
3617 
3618 	if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3619 		return 0;
3620 	} else {
3621 		return EINVAL;
3622 	}
3623 }
3624 
3625 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3626     CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3627     0, 0, corrupt_text_addr, "-", "");
3628 #endif /* DEBUG || DEVELOPMENT */
3629