1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #if NECP
86 #include <net/necp.h>
87 #endif /* NECP */
88 #if SKYWALK
89 #include <skywalk/os_channel.h>
90 #endif /* SKYWALK */
91
92 #include <security/audit/audit.h>
93 #include <security/mac.h>
94 #include <bsm/audit_kevents.h>
95
96 #include <kern/kalloc.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100
101 #include <mach/shared_region.h>
102 #include <vm/vm_shared_region.h>
103
104 #include <vm/vm_dyld_pager.h>
105
106 #include <vm/vm_protos.h>
107
108 #include <sys/kern_memorystatus.h>
109 #include <sys/kern_memorystatus_freeze.h>
110 #include <sys/proc_internal.h>
111
112 #include <mach-o/fixup-chains.h>
113
114 #if CONFIG_MACF
115 #include <security/mac_framework.h>
116 #endif
117
118 #include <kern/bits.h>
119
120 #if CONFIG_CSR
121 #include <sys/csr.h>
122 #endif /* CONFIG_CSR */
123 #include <sys/trust_caches.h>
124 #include <libkern/amfi/amfi.h>
125 #include <IOKit/IOBSD.h>
126
127 #if VM_MAP_DEBUG_APPLE_PROTECT
128 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
129 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
130
131 #if VM_MAP_DEBUG_FOURK
132 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
133 #endif /* VM_MAP_DEBUG_FOURK */
134
135 #if DEVELOPMENT || DEBUG
136
137 static int
138 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
139 {
140 #pragma unused(arg1, arg2)
141 vm_offset_t kaddr;
142 kern_return_t kr;
143 int error = 0;
144 int size = 0;
145
146 error = sysctl_handle_int(oidp, &size, 0, req);
147 if (error || !req->newptr) {
148 return error;
149 }
150
151 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
152 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
153
154 if (kr == KERN_SUCCESS) {
155 kmem_free(kernel_map, kaddr, size);
156 }
157
158 return error;
159 }
160
161 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
162 0, 0, &sysctl_kmem_alloc_contig, "I", "");
163
164 extern int vm_region_footprint;
165 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
166
167 #endif /* DEVELOPMENT || DEBUG */
168
169 static int
170 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 int error = 0;
174 int value;
175
176 value = task_self_region_footprint();
177 error = SYSCTL_OUT(req, &value, sizeof(int));
178 if (error) {
179 return error;
180 }
181
182 if (!req->newptr) {
183 return 0;
184 }
185
186 error = SYSCTL_IN(req, &value, sizeof(int));
187 if (error) {
188 return error;
189 }
190 task_self_region_footprint_set(value);
191 return 0;
192 }
193 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
194
195 static int
196 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
197 {
198 #pragma unused(arg1, arg2, oidp)
199 int error = 0;
200 int value;
201
202 value = (1 << thread_self_region_page_shift());
203 error = SYSCTL_OUT(req, &value, sizeof(int));
204 if (error) {
205 return error;
206 }
207
208 if (!req->newptr) {
209 return 0;
210 }
211
212 error = SYSCTL_IN(req, &value, sizeof(int));
213 if (error) {
214 return error;
215 }
216
217 if (value != 0 && value != 4096 && value != 16384) {
218 return EINVAL;
219 }
220
221 #if !__ARM_MIXED_PAGE_SIZE__
222 if (value != vm_map_page_size(current_map())) {
223 return EINVAL;
224 }
225 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
226
227 thread_self_region_page_shift_set(bit_first(value));
228 return 0;
229 }
230 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
231
232
233 #if DEVELOPMENT || DEBUG
234 extern int panic_on_unsigned_execute;
235 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
236 #endif /* DEVELOPMENT || DEBUG */
237
238 extern int cs_executable_create_upl;
239 extern int cs_executable_wire;
240 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
241 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
242
243 extern int apple_protect_pager_count;
244 extern int apple_protect_pager_count_mapped;
245 extern unsigned int apple_protect_pager_cache_limit;
246 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
247 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
248 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
249
250 #if DEVELOPMENT || DEBUG
251 extern int radar_20146450;
252 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
253
254 extern int macho_printf;
255 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
256
257 extern int apple_protect_pager_data_request_debug;
258 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
259
260 #if __arm64__
261 /* These are meant to support the page table accounting unit test. */
262 extern unsigned int arm_hardware_page_size;
263 extern unsigned int arm_pt_desc_size;
264 extern unsigned int arm_pt_root_size;
265 extern unsigned int inuse_user_tteroot_count;
266 extern unsigned int inuse_kernel_tteroot_count;
267 extern unsigned int inuse_user_ttepages_count;
268 extern unsigned int inuse_kernel_ttepages_count;
269 extern unsigned int inuse_user_ptepages_count;
270 extern unsigned int inuse_kernel_ptepages_count;
271 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
272 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
273 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
274 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
275 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
276 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
277 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
278 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
279 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
280 extern unsigned int free_page_size_tt_count;
281 extern unsigned int free_two_page_size_tt_count;
282 extern unsigned int free_tt_count;
283 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
284 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
285 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
286 #if DEVELOPMENT || DEBUG
287 extern unsigned long pmap_asid_flushes;
288 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
289 extern unsigned long pmap_asid_hits;
290 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
291 extern unsigned long pmap_asid_misses;
292 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
293 #endif
294 #endif /* __arm64__ */
295
296 #if __arm64__
297 extern int fourk_pager_data_request_debug;
298 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
299 #endif /* __arm64__ */
300 #endif /* DEVELOPMENT || DEBUG */
301
302 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
303 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
304 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
305 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
306 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
307 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
308 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
309 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
310 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
311 #if VM_SCAN_FOR_SHADOW_CHAIN
312 static int vm_shadow_max_enabled = 0; /* Disabled by default */
313 extern int proc_shadow_max(void);
314 static int
315 vm_shadow_max SYSCTL_HANDLER_ARGS
316 {
317 #pragma unused(arg1, arg2, oidp)
318 int value = 0;
319
320 if (vm_shadow_max_enabled) {
321 value = proc_shadow_max();
322 }
323
324 return SYSCTL_OUT(req, &value, sizeof(value));
325 }
326 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
327 0, 0, &vm_shadow_max, "I", "");
328
329 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
330
331 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
332
333 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
334
335 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
336 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
337 /*
338 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
339 */
340
341 #if DEVELOPMENT || DEBUG
342 extern int allow_stack_exec, allow_data_exec;
343
344 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
345 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
346
347 #endif /* DEVELOPMENT || DEBUG */
348
349 static const char *prot_values[] = {
350 "none",
351 "read-only",
352 "write-only",
353 "read-write",
354 "execute-only",
355 "read-execute",
356 "write-execute",
357 "read-write-execute"
358 };
359
360 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)361 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
362 {
363 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
364 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
365 }
366
367 /*
368 * shared_region_unnest_logging: level of logging of unnesting events
369 * 0 - no logging
370 * 1 - throttled logging of unexpected unnesting events (default)
371 * 2 - unthrottled logging of unexpected unnesting events
372 * 3+ - unthrottled logging of all unnesting events
373 */
374 int shared_region_unnest_logging = 1;
375
376 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
377 &shared_region_unnest_logging, 0, "");
378
379 int vm_shared_region_unnest_log_interval = 10;
380 int shared_region_unnest_log_count_threshold = 5;
381
382
383 #if XNU_TARGET_OS_OSX
384
385 #if defined (__x86_64__)
386 static int scdir_enforce = 1;
387 #else /* defined (__x86_64__) */
388 static int scdir_enforce = 0; /* AOT caches live elsewhere */
389 #endif /* defined (__x86_64__) */
390
391 static char *scdir_path[] = {
392 "/System/Library/dyld/",
393 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
394 "/System/Cryptexes/OS/System/Library/dyld",
395 NULL
396 };
397
398 #else /* XNU_TARGET_OS_OSX */
399
400 static int scdir_enforce = 0;
401 static char *scdir_path[] = {
402 "/System/Library/Caches/com.apple.dyld/",
403 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
404 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
405 NULL
406 };
407
408 #endif /* XNU_TARGET_OS_OSX */
409
410 static char *driverkit_scdir_path[] = {
411 "/System/DriverKit/System/Library/dyld/",
412 #if XNU_TARGET_OS_OSX
413 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
414 #else
415 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
416 #endif /* XNU_TARGET_OS_OSX */
417 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
418 NULL
419 };
420
421 #ifndef SECURE_KERNEL
422 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
423 {
424 #if CONFIG_CSR
425 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
426 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
427 return EPERM;
428 }
429 #endif /* CONFIG_CSR */
430 return sysctl_handle_int(oidp, arg1, arg2, req);
431 }
432
433 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
434 #endif
435
436 /* These log rate throttling state variables aren't thread safe, but
437 * are sufficient unto the task.
438 */
439 static int64_t last_unnest_log_time = 0;
440 static int shared_region_unnest_log_count = 0;
441
442 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)443 log_unnest_badness(
444 vm_map_t m,
445 vm_map_offset_t s,
446 vm_map_offset_t e,
447 boolean_t is_nested_map,
448 vm_map_offset_t lowest_unnestable_addr)
449 {
450 struct timeval tv;
451
452 if (shared_region_unnest_logging == 0) {
453 return;
454 }
455
456 if (shared_region_unnest_logging <= 2 &&
457 is_nested_map &&
458 s >= lowest_unnestable_addr) {
459 /*
460 * Unnesting of writable map entries is fine.
461 */
462 return;
463 }
464
465 if (shared_region_unnest_logging <= 1) {
466 microtime(&tv);
467 if ((tv.tv_sec - last_unnest_log_time) <
468 vm_shared_region_unnest_log_interval) {
469 if (shared_region_unnest_log_count++ >
470 shared_region_unnest_log_count_threshold) {
471 return;
472 }
473 } else {
474 last_unnest_log_time = tv.tv_sec;
475 shared_region_unnest_log_count = 0;
476 }
477 }
478
479 DTRACE_VM4(log_unnest_badness,
480 vm_map_t, m,
481 vm_map_offset_t, s,
482 vm_map_offset_t, e,
483 vm_map_offset_t, lowest_unnestable_addr);
484 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
485 }
486
487 int
useracc(user_addr_t addr,user_size_t len,int prot)488 useracc(
489 user_addr_t addr,
490 user_size_t len,
491 int prot)
492 {
493 vm_map_t map;
494
495 map = current_map();
496 return vm_map_check_protection(
497 map,
498 vm_map_trunc_page(addr,
499 vm_map_page_mask(map)),
500 vm_map_round_page(addr + len,
501 vm_map_page_mask(map)),
502 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
503 }
504
505 int
vslock(user_addr_t addr,user_size_t len)506 vslock(
507 user_addr_t addr,
508 user_size_t len)
509 {
510 kern_return_t kret;
511 vm_map_t map;
512
513 map = current_map();
514 kret = vm_map_wire_kernel(map,
515 vm_map_trunc_page(addr,
516 vm_map_page_mask(map)),
517 vm_map_round_page(addr + len,
518 vm_map_page_mask(map)),
519 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
520 FALSE);
521
522 switch (kret) {
523 case KERN_SUCCESS:
524 return 0;
525 case KERN_INVALID_ADDRESS:
526 case KERN_NO_SPACE:
527 return ENOMEM;
528 case KERN_PROTECTION_FAILURE:
529 return EACCES;
530 default:
531 return EINVAL;
532 }
533 }
534
535 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)536 vsunlock(
537 user_addr_t addr,
538 user_size_t len,
539 __unused int dirtied)
540 {
541 #if FIXME /* [ */
542 pmap_t pmap;
543 vm_page_t pg;
544 vm_map_offset_t vaddr;
545 ppnum_t paddr;
546 #endif /* FIXME ] */
547 kern_return_t kret;
548 vm_map_t map;
549
550 map = current_map();
551
552 #if FIXME /* [ */
553 if (dirtied) {
554 pmap = get_task_pmap(current_task());
555 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
556 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
557 vaddr += PAGE_SIZE) {
558 paddr = pmap_find_phys(pmap, vaddr);
559 pg = PHYS_TO_VM_PAGE(paddr);
560 vm_page_set_modified(pg);
561 }
562 }
563 #endif /* FIXME ] */
564 #ifdef lint
565 dirtied++;
566 #endif /* lint */
567 kret = vm_map_unwire(map,
568 vm_map_trunc_page(addr,
569 vm_map_page_mask(map)),
570 vm_map_round_page(addr + len,
571 vm_map_page_mask(map)),
572 FALSE);
573 switch (kret) {
574 case KERN_SUCCESS:
575 return 0;
576 case KERN_INVALID_ADDRESS:
577 case KERN_NO_SPACE:
578 return ENOMEM;
579 case KERN_PROTECTION_FAILURE:
580 return EACCES;
581 default:
582 return EINVAL;
583 }
584 }
585
586 int
subyte(user_addr_t addr,int byte)587 subyte(
588 user_addr_t addr,
589 int byte)
590 {
591 char character;
592
593 character = (char)byte;
594 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
595 }
596
597 int
suibyte(user_addr_t addr,int byte)598 suibyte(
599 user_addr_t addr,
600 int byte)
601 {
602 char character;
603
604 character = (char)byte;
605 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
606 }
607
608 int
fubyte(user_addr_t addr)609 fubyte(user_addr_t addr)
610 {
611 unsigned char byte;
612
613 if (copyin(addr, (void *) &byte, sizeof(char))) {
614 return -1;
615 }
616 return byte;
617 }
618
619 int
fuibyte(user_addr_t addr)620 fuibyte(user_addr_t addr)
621 {
622 unsigned char byte;
623
624 if (copyin(addr, (void *) &(byte), sizeof(char))) {
625 return -1;
626 }
627 return byte;
628 }
629
630 int
suword(user_addr_t addr,long word)631 suword(
632 user_addr_t addr,
633 long word)
634 {
635 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
636 }
637
638 long
fuword(user_addr_t addr)639 fuword(user_addr_t addr)
640 {
641 long word = 0;
642
643 if (copyin(addr, (void *) &word, sizeof(int))) {
644 return -1;
645 }
646 return word;
647 }
648
649 /* suiword and fuiword are the same as suword and fuword, respectively */
650
651 int
suiword(user_addr_t addr,long word)652 suiword(
653 user_addr_t addr,
654 long word)
655 {
656 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
657 }
658
659 long
fuiword(user_addr_t addr)660 fuiword(user_addr_t addr)
661 {
662 long word = 0;
663
664 if (copyin(addr, (void *) &word, sizeof(int))) {
665 return -1;
666 }
667 return word;
668 }
669
670 /*
671 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
672 * fetching and setting of process-sized size_t and pointer values.
673 */
674 int
sulong(user_addr_t addr,int64_t word)675 sulong(user_addr_t addr, int64_t word)
676 {
677 if (IS_64BIT_PROCESS(current_proc())) {
678 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
679 } else {
680 return suiword(addr, (long)word);
681 }
682 }
683
684 int64_t
fulong(user_addr_t addr)685 fulong(user_addr_t addr)
686 {
687 int64_t longword;
688
689 if (IS_64BIT_PROCESS(current_proc())) {
690 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
691 return -1;
692 }
693 return longword;
694 } else {
695 return (int64_t)fuiword(addr);
696 }
697 }
698
699 int
suulong(user_addr_t addr,uint64_t uword)700 suulong(user_addr_t addr, uint64_t uword)
701 {
702 if (IS_64BIT_PROCESS(current_proc())) {
703 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
704 } else {
705 return suiword(addr, (uint32_t)uword);
706 }
707 }
708
709 uint64_t
fuulong(user_addr_t addr)710 fuulong(user_addr_t addr)
711 {
712 uint64_t ulongword;
713
714 if (IS_64BIT_PROCESS(current_proc())) {
715 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
716 return -1ULL;
717 }
718 return ulongword;
719 } else {
720 return (uint64_t)fuiword(addr);
721 }
722 }
723
724 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)725 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
726 {
727 return ENOTSUP;
728 }
729
730 /*
731 * pid_for_task
732 *
733 * Find the BSD process ID for the Mach task associated with the given Mach port
734 * name
735 *
736 * Parameters: args User argument descriptor (see below)
737 *
738 * Indirect parameters: args->t Mach port name
739 * args->pid Process ID (returned value; see below)
740 *
741 * Returns: KERL_SUCCESS Success
742 * KERN_FAILURE Not success
743 *
744 * Implicit returns: args->pid Process ID
745 *
746 */
747 kern_return_t
pid_for_task(struct pid_for_task_args * args)748 pid_for_task(
749 struct pid_for_task_args *args)
750 {
751 mach_port_name_t t = args->t;
752 user_addr_t pid_addr = args->pid;
753 proc_t p;
754 task_t t1;
755 int pid = -1;
756 kern_return_t err = KERN_SUCCESS;
757
758 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
759 AUDIT_ARG(mach_port1, t);
760
761 t1 = port_name_to_task_name(t);
762
763 if (t1 == TASK_NULL) {
764 err = KERN_FAILURE;
765 goto pftout;
766 } else {
767 p = get_bsdtask_info(t1);
768 if (p) {
769 pid = proc_pid(p);
770 err = KERN_SUCCESS;
771 } else if (is_corpsetask(t1)) {
772 pid = task_pid(t1);
773 err = KERN_SUCCESS;
774 } else {
775 err = KERN_FAILURE;
776 }
777 }
778 task_deallocate(t1);
779 pftout:
780 AUDIT_ARG(pid, pid);
781 (void) copyout((char *) &pid, pid_addr, sizeof(int));
782 AUDIT_MACH_SYSCALL_EXIT(err);
783 return err;
784 }
785
786 /*
787 *
788 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
789 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
790 *
791 */
792 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
793
794 /*
795 * Routine: task_for_pid_posix_check
796 * Purpose:
797 * Verify that the current process should be allowed to
798 * get the target process's task port. This is only
799 * permitted if:
800 * - The current process is root
801 * OR all of the following are true:
802 * - The target process's real, effective, and saved uids
803 * are the same as the current proc's euid,
804 * - The target process's group set is a subset of the
805 * calling process's group set, and
806 * - The target process hasn't switched credentials.
807 *
808 * Returns: TRUE: permitted
809 * FALSE: denied
810 */
811 static int
task_for_pid_posix_check(proc_t target)812 task_for_pid_posix_check(proc_t target)
813 {
814 kauth_cred_t targetcred, mycred;
815 bool checkcredentials;
816 uid_t myuid;
817 int allowed;
818
819 /* No task_for_pid on bad targets */
820 if (target->p_stat == SZOMB) {
821 return FALSE;
822 }
823
824 mycred = kauth_cred_get();
825 myuid = kauth_cred_getuid(mycred);
826
827 /* If we're running as root, the check passes */
828 if (kauth_cred_issuser(mycred)) {
829 return TRUE;
830 }
831
832 /* We're allowed to get our own task port */
833 if (target == current_proc()) {
834 return TRUE;
835 }
836
837 /*
838 * Under DENY, only root can get another proc's task port,
839 * so no more checks are needed.
840 */
841 if (tfp_policy == KERN_TFP_POLICY_DENY) {
842 return FALSE;
843 }
844
845 targetcred = kauth_cred_proc_ref(target);
846 allowed = TRUE;
847
848 checkcredentials = !proc_is_third_party_debuggable_driver(target);
849
850 if (checkcredentials) {
851 /* Do target's ruid, euid, and saved uid match my euid? */
852 if ((kauth_cred_getuid(targetcred) != myuid) ||
853 (kauth_cred_getruid(targetcred) != myuid) ||
854 (kauth_cred_getsvuid(targetcred) != myuid)) {
855 allowed = FALSE;
856 goto out;
857 }
858 /* Are target's groups a subset of my groups? */
859 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
860 allowed == 0) {
861 allowed = FALSE;
862 goto out;
863 }
864 }
865
866 /* Has target switched credentials? */
867 if (target->p_flag & P_SUGID) {
868 allowed = FALSE;
869 goto out;
870 }
871
872 out:
873 kauth_cred_unref(&targetcred);
874 return allowed;
875 }
876
877 /*
878 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
879 *
880 * Description: Waits for the user space daemon to respond to the request
881 * we made. Function declared non inline to be visible in
882 * stackshots and spindumps as well as debugging.
883 */
884 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)885 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
886 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
887 {
888 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
889 }
890
891 /*
892 * Routine: task_for_pid
893 * Purpose:
894 * Get the task port for another "process", named by its
895 * process ID on the same host as "target_task".
896 *
897 * Only permitted to privileged processes, or processes
898 * with the same user ID.
899 *
900 * Note: if pid == 0, an error is return no matter who is calling.
901 *
902 * XXX This should be a BSD system call, not a Mach trap!!!
903 */
904 kern_return_t
task_for_pid(struct task_for_pid_args * args)905 task_for_pid(
906 struct task_for_pid_args *args)
907 {
908 mach_port_name_t target_tport = args->target_tport;
909 int pid = args->pid;
910 user_addr_t task_addr = args->t;
911 proc_t p = PROC_NULL;
912 task_t t1 = TASK_NULL;
913 task_t task = TASK_NULL;
914 mach_port_name_t tret = MACH_PORT_NULL;
915 ipc_port_t tfpport = MACH_PORT_NULL;
916 void * sright = NULL;
917 int error = 0;
918 boolean_t is_current_proc = FALSE;
919 struct proc_ident pident = {0};
920
921 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
922 AUDIT_ARG(pid, pid);
923 AUDIT_ARG(mach_port1, target_tport);
924
925 /* Always check if pid == 0 */
926 if (pid == 0) {
927 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
928 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
929 return KERN_FAILURE;
930 }
931
932 t1 = port_name_to_task(target_tport);
933 if (t1 == TASK_NULL) {
934 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
935 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
936 return KERN_FAILURE;
937 }
938
939
940 p = proc_find(pid);
941 if (p == PROC_NULL) {
942 error = KERN_FAILURE;
943 goto tfpout;
944 }
945 pident = proc_ident(p);
946 is_current_proc = (p == current_proc());
947
948 #if CONFIG_AUDIT
949 AUDIT_ARG(process, p);
950 #endif
951
952 if (!(task_for_pid_posix_check(p))) {
953 error = KERN_FAILURE;
954 goto tfpout;
955 }
956
957 if (proc_task(p) == TASK_NULL) {
958 error = KERN_SUCCESS;
959 goto tfpout;
960 }
961
962 /*
963 * Grab a task reference and drop the proc reference as the proc ref
964 * shouldn't be held accross upcalls.
965 */
966 task = proc_task(p);
967 task_reference(task);
968
969 proc_rele(p);
970 p = PROC_NULL;
971
972 #if CONFIG_MACF
973 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
974 if (error) {
975 error = KERN_FAILURE;
976 goto tfpout;
977 }
978 #endif
979
980 /* If we aren't root and target's task access port is set... */
981 if (!kauth_cred_issuser(kauth_cred_get()) &&
982 !is_current_proc &&
983 (task_get_task_access_port(task, &tfpport) == 0) &&
984 (tfpport != IPC_PORT_NULL)) {
985 if (tfpport == IPC_PORT_DEAD) {
986 error = KERN_PROTECTION_FAILURE;
987 goto tfpout;
988 }
989
990 /* Call up to the task access server */
991 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
992 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
993
994 if (error != MACH_MSG_SUCCESS) {
995 if (error == MACH_RCV_INTERRUPTED) {
996 error = KERN_ABORTED;
997 } else {
998 error = KERN_FAILURE;
999 }
1000 goto tfpout;
1001 }
1002 }
1003
1004 /* Grant task port access */
1005 extmod_statistics_incr_task_for_pid(task);
1006
1007 /* this reference will be consumed during conversion */
1008 task_reference(task);
1009 if (task == current_task()) {
1010 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
1011 sright = (void *)convert_task_to_port_pinned(task);
1012 } else {
1013 sright = (void *)convert_task_to_port(task);
1014 }
1015 /* extra task ref consumed */
1016
1017 /*
1018 * Check if the task has been corpsified. We must do so after conversion
1019 * since we don't hold locks and may have grabbed a corpse control port
1020 * above which will prevent no-senders notification delivery.
1021 */
1022 if (is_corpsetask(task)) {
1023 ipc_port_release_send(sright);
1024 error = KERN_FAILURE;
1025 goto tfpout;
1026 }
1027
1028 tret = ipc_port_copyout_send(
1029 sright,
1030 get_task_ipcspace(current_task()));
1031
1032 error = KERN_SUCCESS;
1033
1034 tfpout:
1035 task_deallocate(t1);
1036 AUDIT_ARG(mach_port2, tret);
1037 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1038
1039 if (tfpport != IPC_PORT_NULL) {
1040 ipc_port_release_send(tfpport);
1041 }
1042 if (task != TASK_NULL) {
1043 task_deallocate(task);
1044 }
1045 if (p != PROC_NULL) {
1046 proc_rele(p);
1047 }
1048 AUDIT_MACH_SYSCALL_EXIT(error);
1049 return error;
1050 }
1051
1052 /*
1053 * Routine: task_name_for_pid
1054 * Purpose:
1055 * Get the task name port for another "process", named by its
1056 * process ID on the same host as "target_task".
1057 *
1058 * Only permitted to privileged processes, or processes
1059 * with the same user ID.
1060 *
1061 * XXX This should be a BSD system call, not a Mach trap!!!
1062 */
1063
1064 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1065 task_name_for_pid(
1066 struct task_name_for_pid_args *args)
1067 {
1068 mach_port_name_t target_tport = args->target_tport;
1069 int pid = args->pid;
1070 user_addr_t task_addr = args->t;
1071 proc_t p = PROC_NULL;
1072 task_t t1 = TASK_NULL;
1073 mach_port_name_t tret = MACH_PORT_NULL;
1074 void * sright;
1075 int error = 0, refheld = 0;
1076 kauth_cred_t target_cred;
1077
1078 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1079 AUDIT_ARG(pid, pid);
1080 AUDIT_ARG(mach_port1, target_tport);
1081
1082 t1 = port_name_to_task(target_tport);
1083 if (t1 == TASK_NULL) {
1084 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1085 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1086 return KERN_FAILURE;
1087 }
1088
1089 p = proc_find(pid);
1090 if (p != PROC_NULL) {
1091 AUDIT_ARG(process, p);
1092 target_cred = kauth_cred_proc_ref(p);
1093 refheld = 1;
1094
1095 if ((p->p_stat != SZOMB)
1096 && ((current_proc() == p)
1097 || kauth_cred_issuser(kauth_cred_get())
1098 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1099 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1100 if (proc_task(p) != TASK_NULL) {
1101 struct proc_ident pident = proc_ident(p);
1102
1103 task_t task = proc_task(p);
1104
1105 task_reference(task);
1106 proc_rele(p);
1107 p = PROC_NULL;
1108 #if CONFIG_MACF
1109 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1110 if (error) {
1111 task_deallocate(task);
1112 goto noperm;
1113 }
1114 #endif
1115 sright = (void *)convert_task_name_to_port(task);
1116 task = NULL;
1117 tret = ipc_port_copyout_send(sright,
1118 get_task_ipcspace(current_task()));
1119 } else {
1120 tret = MACH_PORT_NULL;
1121 }
1122
1123 AUDIT_ARG(mach_port2, tret);
1124 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1125 task_deallocate(t1);
1126 error = KERN_SUCCESS;
1127 goto tnfpout;
1128 }
1129 }
1130
1131 #if CONFIG_MACF
1132 noperm:
1133 #endif
1134 task_deallocate(t1);
1135 tret = MACH_PORT_NULL;
1136 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1137 error = KERN_FAILURE;
1138 tnfpout:
1139 if (refheld != 0) {
1140 kauth_cred_unref(&target_cred);
1141 }
1142 if (p != PROC_NULL) {
1143 proc_rele(p);
1144 }
1145 AUDIT_MACH_SYSCALL_EXIT(error);
1146 return error;
1147 }
1148
1149 /*
1150 * Routine: task_inspect_for_pid
1151 * Purpose:
1152 * Get the task inspect port for another "process", named by its
1153 * process ID on the same host as "target_task".
1154 */
1155 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1156 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1157 {
1158 mach_port_name_t target_tport = args->target_tport;
1159 int pid = args->pid;
1160 user_addr_t task_addr = args->t;
1161
1162 proc_t proc = PROC_NULL;
1163 task_t t1 = TASK_NULL;
1164 task_inspect_t task_insp = TASK_INSPECT_NULL;
1165 mach_port_name_t tret = MACH_PORT_NULL;
1166 ipc_port_t tfpport = MACH_PORT_NULL;
1167 int error = 0;
1168 void *sright = NULL;
1169 boolean_t is_current_proc = FALSE;
1170 struct proc_ident pident = {0};
1171
1172 /* Disallow inspect port for kernel_task */
1173 if (pid == 0) {
1174 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1175 return EPERM;
1176 }
1177
1178 t1 = port_name_to_task(target_tport);
1179 if (t1 == TASK_NULL) {
1180 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1181 return EINVAL;
1182 }
1183
1184 proc = proc_find(pid);
1185 if (proc == PROC_NULL) {
1186 error = ESRCH;
1187 goto tifpout;
1188 }
1189 pident = proc_ident(proc);
1190 is_current_proc = (proc == current_proc());
1191
1192 if (!(task_for_pid_posix_check(proc))) {
1193 error = EPERM;
1194 goto tifpout;
1195 }
1196
1197 task_insp = proc_task(proc);
1198 if (task_insp == TASK_INSPECT_NULL) {
1199 goto tifpout;
1200 }
1201
1202 /*
1203 * Grab a task reference and drop the proc reference before making any upcalls.
1204 */
1205 task_reference(task_insp);
1206
1207 proc_rele(proc);
1208 proc = PROC_NULL;
1209
1210 #if CONFIG_MACF
1211 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1212 if (error) {
1213 error = EPERM;
1214 goto tifpout;
1215 }
1216 #endif
1217
1218 /* If we aren't root and target's task access port is set... */
1219 if (!kauth_cred_issuser(kauth_cred_get()) &&
1220 !is_current_proc &&
1221 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1222 (tfpport != IPC_PORT_NULL)) {
1223 if (tfpport == IPC_PORT_DEAD) {
1224 error = EACCES;
1225 goto tifpout;
1226 }
1227
1228
1229 /* Call up to the task access server */
1230 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1231 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1232
1233 if (error != MACH_MSG_SUCCESS) {
1234 if (error == MACH_RCV_INTERRUPTED) {
1235 error = EINTR;
1236 } else {
1237 error = EPERM;
1238 }
1239 goto tifpout;
1240 }
1241 }
1242
1243 /* Check if the task has been corpsified */
1244 if (is_corpsetask(task_insp)) {
1245 error = EACCES;
1246 goto tifpout;
1247 }
1248
1249 /* could be IP_NULL, consumes a ref */
1250 sright = (void*) convert_task_inspect_to_port(task_insp);
1251 task_insp = TASK_INSPECT_NULL;
1252 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1253
1254 tifpout:
1255 task_deallocate(t1);
1256 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1257 if (proc != PROC_NULL) {
1258 proc_rele(proc);
1259 }
1260 if (tfpport != IPC_PORT_NULL) {
1261 ipc_port_release_send(tfpport);
1262 }
1263 if (task_insp != TASK_INSPECT_NULL) {
1264 task_deallocate(task_insp);
1265 }
1266
1267 *ret = error;
1268 return error;
1269 }
1270
1271 /*
1272 * Routine: task_read_for_pid
1273 * Purpose:
1274 * Get the task read port for another "process", named by its
1275 * process ID on the same host as "target_task".
1276 */
1277 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1278 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1279 {
1280 mach_port_name_t target_tport = args->target_tport;
1281 int pid = args->pid;
1282 user_addr_t task_addr = args->t;
1283
1284 proc_t proc = PROC_NULL;
1285 task_t t1 = TASK_NULL;
1286 task_read_t task_read = TASK_READ_NULL;
1287 mach_port_name_t tret = MACH_PORT_NULL;
1288 ipc_port_t tfpport = MACH_PORT_NULL;
1289 int error = 0;
1290 void *sright = NULL;
1291 boolean_t is_current_proc = FALSE;
1292 struct proc_ident pident = {0};
1293
1294 /* Disallow read port for kernel_task */
1295 if (pid == 0) {
1296 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1297 return EPERM;
1298 }
1299
1300 t1 = port_name_to_task(target_tport);
1301 if (t1 == TASK_NULL) {
1302 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1303 return EINVAL;
1304 }
1305
1306 proc = proc_find(pid);
1307 if (proc == PROC_NULL) {
1308 error = ESRCH;
1309 goto trfpout;
1310 }
1311 pident = proc_ident(proc);
1312 is_current_proc = (proc == current_proc());
1313
1314 if (!(task_for_pid_posix_check(proc))) {
1315 error = EPERM;
1316 goto trfpout;
1317 }
1318
1319 task_read = proc_task(proc);
1320 if (task_read == TASK_INSPECT_NULL) {
1321 goto trfpout;
1322 }
1323
1324 /*
1325 * Grab a task reference and drop the proc reference before making any upcalls.
1326 */
1327 task_reference(task_read);
1328
1329 proc_rele(proc);
1330 proc = PROC_NULL;
1331
1332 #if CONFIG_MACF
1333 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1334 if (error) {
1335 error = EPERM;
1336 goto trfpout;
1337 }
1338 #endif
1339
1340 /* If we aren't root and target's task access port is set... */
1341 if (!kauth_cred_issuser(kauth_cred_get()) &&
1342 !is_current_proc &&
1343 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1344 (tfpport != IPC_PORT_NULL)) {
1345 if (tfpport == IPC_PORT_DEAD) {
1346 error = EACCES;
1347 goto trfpout;
1348 }
1349
1350
1351 /* Call up to the task access server */
1352 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1353 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1354
1355 if (error != MACH_MSG_SUCCESS) {
1356 if (error == MACH_RCV_INTERRUPTED) {
1357 error = EINTR;
1358 } else {
1359 error = EPERM;
1360 }
1361 goto trfpout;
1362 }
1363 }
1364
1365 /* Check if the task has been corpsified */
1366 if (is_corpsetask(task_read)) {
1367 error = EACCES;
1368 goto trfpout;
1369 }
1370
1371 /* could be IP_NULL, consumes a ref */
1372 sright = (void*) convert_task_read_to_port(task_read);
1373 task_read = TASK_READ_NULL;
1374 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1375
1376 trfpout:
1377 task_deallocate(t1);
1378 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1379 if (proc != PROC_NULL) {
1380 proc_rele(proc);
1381 }
1382 if (tfpport != IPC_PORT_NULL) {
1383 ipc_port_release_send(tfpport);
1384 }
1385 if (task_read != TASK_READ_NULL) {
1386 task_deallocate(task_read);
1387 }
1388
1389 *ret = error;
1390 return error;
1391 }
1392
1393 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1394 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1395 {
1396 task_t target = NULL;
1397 proc_t targetproc = PROC_NULL;
1398 int pid = args->pid;
1399 int error = 0;
1400 mach_port_t tfpport = MACH_PORT_NULL;
1401
1402 if (pid == 0) {
1403 error = EPERM;
1404 goto out;
1405 }
1406
1407 targetproc = proc_find(pid);
1408 if (targetproc == PROC_NULL) {
1409 error = ESRCH;
1410 goto out;
1411 }
1412
1413 if (!task_for_pid_posix_check(targetproc) &&
1414 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1415 error = EPERM;
1416 goto out;
1417 }
1418
1419 #if CONFIG_MACF
1420 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1421 if (error) {
1422 error = EPERM;
1423 goto out;
1424 }
1425 #endif
1426
1427 target = proc_task(targetproc);
1428 #if XNU_TARGET_OS_OSX
1429 if (target != TASK_NULL) {
1430 /* If we aren't root and target's task access port is set... */
1431 if (!kauth_cred_issuser(kauth_cred_get()) &&
1432 targetproc != current_proc() &&
1433 (task_get_task_access_port(target, &tfpport) == 0) &&
1434 (tfpport != IPC_PORT_NULL)) {
1435 if (tfpport == IPC_PORT_DEAD) {
1436 error = EACCES;
1437 goto out;
1438 }
1439
1440 /* Call up to the task access server */
1441 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1442 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1443
1444 if (error != MACH_MSG_SUCCESS) {
1445 if (error == MACH_RCV_INTERRUPTED) {
1446 error = EINTR;
1447 } else {
1448 error = EPERM;
1449 }
1450 goto out;
1451 }
1452 }
1453 }
1454 #endif /* XNU_TARGET_OS_OSX */
1455
1456 task_reference(target);
1457 error = task_pidsuspend(target);
1458 if (error) {
1459 if (error == KERN_INVALID_ARGUMENT) {
1460 error = EINVAL;
1461 } else {
1462 error = EPERM;
1463 }
1464 }
1465 #if CONFIG_MEMORYSTATUS
1466 else {
1467 memorystatus_on_suspend(targetproc);
1468 }
1469 #endif
1470
1471 task_deallocate(target);
1472
1473 out:
1474 if (tfpport != IPC_PORT_NULL) {
1475 ipc_port_release_send(tfpport);
1476 }
1477
1478 if (targetproc != PROC_NULL) {
1479 proc_rele(targetproc);
1480 }
1481 *ret = error;
1482 return error;
1483 }
1484
1485 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1486 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1487 {
1488 mach_port_name_t target_tport = args->target_tport;
1489 int pid = args->pid;
1490 user_addr_t task_addr = args->t;
1491 proc_t p = PROC_NULL;
1492 task_t t1 = TASK_NULL;
1493 task_t task = TASK_NULL;
1494 mach_port_name_t tret = MACH_PORT_NULL;
1495 ipc_port_t tfpport = MACH_PORT_NULL;
1496 ipc_port_t sright = NULL;
1497 int error = 0;
1498 boolean_t is_current_proc = FALSE;
1499 struct proc_ident pident = {0};
1500
1501 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1502 AUDIT_ARG(pid, pid);
1503 AUDIT_ARG(mach_port1, target_tport);
1504
1505 /* Always check if pid == 0 */
1506 if (pid == 0) {
1507 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1508 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1509 return KERN_FAILURE;
1510 }
1511
1512 t1 = port_name_to_task(target_tport);
1513 if (t1 == TASK_NULL) {
1514 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1515 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1516 return KERN_FAILURE;
1517 }
1518
1519 p = proc_find(pid);
1520 if (p == PROC_NULL) {
1521 error = KERN_FAILURE;
1522 goto tfpout;
1523 }
1524 pident = proc_ident(p);
1525 is_current_proc = (p == current_proc());
1526
1527 #if CONFIG_AUDIT
1528 AUDIT_ARG(process, p);
1529 #endif
1530
1531 if (!(task_for_pid_posix_check(p))) {
1532 error = KERN_FAILURE;
1533 goto tfpout;
1534 }
1535
1536 if (proc_task(p) == TASK_NULL) {
1537 error = KERN_SUCCESS;
1538 goto tfpout;
1539 }
1540
1541 /*
1542 * Grab a task reference and drop the proc reference before making any upcalls.
1543 */
1544 task = proc_task(p);
1545 task_reference(task);
1546
1547 proc_rele(p);
1548 p = PROC_NULL;
1549
1550 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1551 #if CONFIG_MACF
1552 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1553 if (error) {
1554 error = KERN_FAILURE;
1555 goto tfpout;
1556 }
1557 #endif
1558
1559 /* If we aren't root and target's task access port is set... */
1560 if (!kauth_cred_issuser(kauth_cred_get()) &&
1561 !is_current_proc &&
1562 (task_get_task_access_port(task, &tfpport) == 0) &&
1563 (tfpport != IPC_PORT_NULL)) {
1564 if (tfpport == IPC_PORT_DEAD) {
1565 error = KERN_PROTECTION_FAILURE;
1566 goto tfpout;
1567 }
1568
1569
1570 /* Call up to the task access server */
1571 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1572 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1573
1574 if (error != MACH_MSG_SUCCESS) {
1575 if (error == MACH_RCV_INTERRUPTED) {
1576 error = KERN_ABORTED;
1577 } else {
1578 error = KERN_FAILURE;
1579 }
1580 goto tfpout;
1581 }
1582 }
1583 }
1584
1585 /* Check if the task has been corpsified */
1586 if (is_corpsetask(task)) {
1587 error = KERN_FAILURE;
1588 goto tfpout;
1589 }
1590
1591 error = task_get_debug_control_port(task, &sright);
1592 if (error != KERN_SUCCESS) {
1593 goto tfpout;
1594 }
1595
1596 tret = ipc_port_copyout_send(
1597 sright,
1598 get_task_ipcspace(current_task()));
1599
1600 error = KERN_SUCCESS;
1601
1602 tfpout:
1603 task_deallocate(t1);
1604 AUDIT_ARG(mach_port2, tret);
1605 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1606
1607 if (tfpport != IPC_PORT_NULL) {
1608 ipc_port_release_send(tfpport);
1609 }
1610 if (task != TASK_NULL) {
1611 task_deallocate(task);
1612 }
1613 if (p != PROC_NULL) {
1614 proc_rele(p);
1615 }
1616 AUDIT_MACH_SYSCALL_EXIT(error);
1617 return error;
1618 }
1619
1620 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1621 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1622 {
1623 task_t target = NULL;
1624 proc_t targetproc = PROC_NULL;
1625 int pid = args->pid;
1626 int error = 0;
1627 mach_port_t tfpport = MACH_PORT_NULL;
1628
1629 if (pid == 0) {
1630 error = EPERM;
1631 goto out;
1632 }
1633
1634 targetproc = proc_find(pid);
1635 if (targetproc == PROC_NULL) {
1636 error = ESRCH;
1637 goto out;
1638 }
1639
1640 if (!task_for_pid_posix_check(targetproc) &&
1641 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1642 error = EPERM;
1643 goto out;
1644 }
1645
1646 #if CONFIG_MACF
1647 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1648 if (error) {
1649 error = EPERM;
1650 goto out;
1651 }
1652 #endif
1653
1654 target = proc_task(targetproc);
1655 #if XNU_TARGET_OS_OSX
1656 if (target != TASK_NULL) {
1657 /* If we aren't root and target's task access port is set... */
1658 if (!kauth_cred_issuser(kauth_cred_get()) &&
1659 targetproc != current_proc() &&
1660 (task_get_task_access_port(target, &tfpport) == 0) &&
1661 (tfpport != IPC_PORT_NULL)) {
1662 if (tfpport == IPC_PORT_DEAD) {
1663 error = EACCES;
1664 goto out;
1665 }
1666
1667 /* Call up to the task access server */
1668 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1669 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1670
1671 if (error != MACH_MSG_SUCCESS) {
1672 if (error == MACH_RCV_INTERRUPTED) {
1673 error = EINTR;
1674 } else {
1675 error = EPERM;
1676 }
1677 goto out;
1678 }
1679 }
1680 }
1681 #endif /* XNU_TARGET_OS_OSX */
1682
1683 #if !XNU_TARGET_OS_OSX
1684 #if SOCKETS
1685 resume_proc_sockets(targetproc);
1686 #endif /* SOCKETS */
1687 #endif /* !XNU_TARGET_OS_OSX */
1688
1689 task_reference(target);
1690
1691 #if CONFIG_MEMORYSTATUS
1692 memorystatus_on_resume(targetproc);
1693 #endif
1694
1695 error = task_pidresume(target);
1696 if (error) {
1697 if (error == KERN_INVALID_ARGUMENT) {
1698 error = EINVAL;
1699 } else {
1700 if (error == KERN_MEMORY_ERROR) {
1701 psignal(targetproc, SIGKILL);
1702 error = EIO;
1703 } else {
1704 error = EPERM;
1705 }
1706 }
1707 }
1708
1709 task_deallocate(target);
1710
1711 out:
1712 if (tfpport != IPC_PORT_NULL) {
1713 ipc_port_release_send(tfpport);
1714 }
1715
1716 if (targetproc != PROC_NULL) {
1717 proc_rele(targetproc);
1718 }
1719
1720 *ret = error;
1721 return error;
1722 }
1723
1724 #if !XNU_TARGET_OS_OSX
1725 /*
1726 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1727 * When a process is specified, this call is blocking, otherwise we wake up the
1728 * freezer thread and do not block on a process being frozen.
1729 */
1730 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1731 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1732 {
1733 int error = 0;
1734 proc_t targetproc = PROC_NULL;
1735 int pid = args->pid;
1736
1737 #ifndef CONFIG_FREEZE
1738 #pragma unused(pid)
1739 #else
1740
1741 /*
1742 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1743 */
1744
1745 if (pid >= 0) {
1746 targetproc = proc_find(pid);
1747
1748 if (targetproc == PROC_NULL) {
1749 error = ESRCH;
1750 goto out;
1751 }
1752
1753 if (!task_for_pid_posix_check(targetproc)) {
1754 error = EPERM;
1755 goto out;
1756 }
1757 }
1758
1759 #if CONFIG_MACF
1760 //Note that targetproc may be null
1761 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1762 if (error) {
1763 error = EPERM;
1764 goto out;
1765 }
1766 #endif
1767
1768 if (pid == -2) {
1769 vm_pageout_anonymous_pages();
1770 } else if (pid == -1) {
1771 memorystatus_on_inactivity(targetproc);
1772 } else {
1773 error = memorystatus_freeze_process_sync(targetproc);
1774 }
1775
1776 out:
1777
1778 #endif /* CONFIG_FREEZE */
1779
1780 if (targetproc != PROC_NULL) {
1781 proc_rele(targetproc);
1782 }
1783 *ret = error;
1784 return error;
1785 }
1786 #endif /* !XNU_TARGET_OS_OSX */
1787
1788 #if SOCKETS
1789 int
networking_memstatus_callout(proc_t p,uint32_t status)1790 networking_memstatus_callout(proc_t p, uint32_t status)
1791 {
1792 struct fileproc *fp;
1793
1794 /*
1795 * proc list lock NOT held
1796 * proc lock NOT held
1797 * a reference on the proc has been held / shall be dropped by the caller.
1798 */
1799 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1800 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1801
1802 proc_fdlock(p);
1803
1804 fdt_foreach(fp, p) {
1805 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1806 #if NECP
1807 case DTYPE_NETPOLICY:
1808 necp_fd_memstatus(p, status,
1809 (struct necp_fd_data *)fp_get_data(fp));
1810 break;
1811 #endif /* NECP */
1812 #if SKYWALK
1813 case DTYPE_CHANNEL:
1814 kern_channel_memstatus(p, status,
1815 (struct kern_channel *)fp_get_data(fp));
1816 break;
1817 #endif /* SKYWALK */
1818 default:
1819 break;
1820 }
1821 }
1822 proc_fdunlock(p);
1823
1824 return 1;
1825 }
1826
1827 #if SKYWALK
1828 /*
1829 * Since we make multiple passes across the fileproc array, record the
1830 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1831 * large enough to accomodate most, if not all cases. If we find more,
1832 * we'll go to the slow path during second pass.
1833 */
1834 #define MAX_CHANNELS 8 /* should be more than enough */
1835 #endif /* SKYWALK */
1836
1837 static int
networking_defunct_callout(proc_t p,void * arg)1838 networking_defunct_callout(proc_t p, void *arg)
1839 {
1840 struct pid_shutdown_sockets_args *args = arg;
1841 int pid = args->pid;
1842 int level = args->level;
1843 struct fileproc *fp;
1844 #if SKYWALK
1845 int i;
1846 int channel_count = 0;
1847 struct kern_channel *channel_array[MAX_CHANNELS];
1848
1849 bzero(&channel_array, sizeof(channel_array));
1850 #endif /* SKYWALK */
1851
1852 proc_fdlock(p);
1853
1854 fdt_foreach(fp, p) {
1855 struct fileglob *fg = fp->fp_glob;
1856
1857 switch (FILEGLOB_DTYPE(fg)) {
1858 case DTYPE_SOCKET: {
1859 struct socket *so = (struct socket *)fg_get_data(fg);
1860 if (proc_getpid(p) == pid || so->last_pid == pid ||
1861 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1862 /* Call networking stack with socket and level */
1863 (void)socket_defunct(p, so, level);
1864 }
1865 break;
1866 }
1867 #if NECP
1868 case DTYPE_NETPOLICY:
1869 /* first pass: defunct necp and get stats for ntstat */
1870 if (proc_getpid(p) == pid) {
1871 necp_fd_defunct(p,
1872 (struct necp_fd_data *)fg_get_data(fg));
1873 }
1874 break;
1875 #endif /* NECP */
1876 #if SKYWALK
1877 case DTYPE_CHANNEL:
1878 /* first pass: get channels and total count */
1879 if (proc_getpid(p) == pid) {
1880 if (channel_count < MAX_CHANNELS) {
1881 channel_array[channel_count] =
1882 (struct kern_channel *)fg_get_data(fg);
1883 }
1884 ++channel_count;
1885 }
1886 break;
1887 #endif /* SKYWALK */
1888 default:
1889 break;
1890 }
1891 }
1892
1893 #if SKYWALK
1894 /*
1895 * Second pass: defunct channels/flows (after NECP). Handle
1896 * the common case of up to MAX_CHANNELS count with fast path,
1897 * and traverse the fileproc array again only if we exceed it.
1898 */
1899 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1900 ASSERT(proc_getpid(p) == pid);
1901 for (i = 0; i < channel_count; i++) {
1902 ASSERT(channel_array[i] != NULL);
1903 kern_channel_defunct(p, channel_array[i]);
1904 }
1905 } else if (channel_count != 0) {
1906 ASSERT(proc_getpid(p) == pid);
1907 fdt_foreach(fp, p) {
1908 struct fileglob *fg = fp->fp_glob;
1909
1910 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1911 kern_channel_defunct(p,
1912 (struct kern_channel *)fg_get_data(fg));
1913 }
1914 }
1915 }
1916 #endif /* SKYWALK */
1917 proc_fdunlock(p);
1918
1919 return PROC_RETURNED;
1920 }
1921
1922 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1923 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1924 {
1925 int error = 0;
1926 proc_t targetproc = PROC_NULL;
1927 int pid = args->pid;
1928 int level = args->level;
1929
1930 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1931 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1932 error = EINVAL;
1933 goto out;
1934 }
1935
1936 targetproc = proc_find(pid);
1937 if (targetproc == PROC_NULL) {
1938 error = ESRCH;
1939 goto out;
1940 }
1941
1942 if (!task_for_pid_posix_check(targetproc) &&
1943 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1944 error = EPERM;
1945 goto out;
1946 }
1947
1948 #if CONFIG_MACF
1949 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1950 if (error) {
1951 error = EPERM;
1952 goto out;
1953 }
1954 #endif
1955
1956 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1957 networking_defunct_callout, args, NULL, NULL);
1958
1959 out:
1960 if (targetproc != PROC_NULL) {
1961 proc_rele(targetproc);
1962 }
1963 *ret = error;
1964 return error;
1965 }
1966
1967 #endif /* SOCKETS */
1968
1969 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1970 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1971 __unused int arg2, struct sysctl_req *req)
1972 {
1973 int error = 0;
1974 int new_value;
1975
1976 error = SYSCTL_OUT(req, arg1, sizeof(int));
1977 if (error || req->newptr == USER_ADDR_NULL) {
1978 return error;
1979 }
1980
1981 if (!kauth_cred_issuser(kauth_cred_get())) {
1982 return EPERM;
1983 }
1984
1985 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1986 goto out;
1987 }
1988 if ((new_value == KERN_TFP_POLICY_DENY)
1989 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1990 tfp_policy = new_value;
1991 } else {
1992 error = EINVAL;
1993 }
1994 out:
1995 return error;
1996 }
1997
1998 #if defined(SECURE_KERNEL)
1999 static int kern_secure_kernel = 1;
2000 #else
2001 static int kern_secure_kernel = 0;
2002 #endif
2003
2004 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2005
2006 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2007 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2008 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2009
2010 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2011 &shared_region_trace_level, 0, "");
2012 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2013 &shared_region_version, 0, "");
2014 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2015 &shared_region_persistence, 0, "");
2016
2017 /*
2018 * shared_region_check_np:
2019 *
2020 * This system call is intended for dyld.
2021 *
2022 * dyld calls this when any process starts to see if the process's shared
2023 * region is already set up and ready to use.
2024 * This call returns the base address of the first mapping in the
2025 * process's shared region's first mapping.
2026 * dyld will then check what's mapped at that address.
2027 *
2028 * If the shared region is empty, dyld will then attempt to map the shared
2029 * cache file in the shared region via the shared_region_map_np() system call.
2030 *
2031 * If something's already mapped in the shared region, dyld will check if it
2032 * matches the shared cache it would like to use for that process.
2033 * If it matches, evrything's ready and the process can proceed and use the
2034 * shared region.
2035 * If it doesn't match, dyld will unmap the shared region and map the shared
2036 * cache into the process's address space via mmap().
2037 *
2038 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2039 * the shared region. We will remove the shared_region reference from the task.
2040 *
2041 * ERROR VALUES
2042 * EINVAL no shared region
2043 * ENOMEM shared region is empty
2044 * EFAULT bad address for "start_address"
2045 */
2046 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2047 shared_region_check_np(
2048 __unused struct proc *p,
2049 struct shared_region_check_np_args *uap,
2050 __unused int *retvalp)
2051 {
2052 vm_shared_region_t shared_region;
2053 mach_vm_offset_t start_address = 0;
2054 int error = 0;
2055 kern_return_t kr;
2056 task_t task = current_task();
2057
2058 SHARED_REGION_TRACE_DEBUG(
2059 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2060 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2061 proc_getpid(p), p->p_comm,
2062 (uint64_t)uap->start_address));
2063
2064 /*
2065 * Special value of start_address used to indicate that map_with_linking() should
2066 * no longer be allowed in this process
2067 */
2068 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2069 p->p_disallow_map_with_linking = TRUE;
2070 return 0;
2071 }
2072
2073 /* retrieve the current tasks's shared region */
2074 shared_region = vm_shared_region_get(task);
2075 if (shared_region != NULL) {
2076 /*
2077 * A NULL argument is used by dyld to indicate the task
2078 * has unmapped its shared region.
2079 */
2080 if (uap->start_address == 0) {
2081 /* unmap it first */
2082 vm_shared_region_remove(task, shared_region);
2083 vm_shared_region_set(task, NULL);
2084 } else {
2085 /* retrieve address of its first mapping... */
2086 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2087 if (kr != KERN_SUCCESS) {
2088 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2089 "check_np(0x%llx) "
2090 "vm_shared_region_start_address() failed\n",
2091 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2092 proc_getpid(p), p->p_comm,
2093 (uint64_t)uap->start_address));
2094 error = ENOMEM;
2095 } else {
2096 #if __has_feature(ptrauth_calls)
2097 /*
2098 * Remap any section of the shared library that
2099 * has authenticated pointers into private memory.
2100 */
2101 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2102 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2103 "check_np(0x%llx) "
2104 "vm_shared_region_auth_remap() failed\n",
2105 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2106 proc_getpid(p), p->p_comm,
2107 (uint64_t)uap->start_address));
2108 error = ENOMEM;
2109 }
2110 #endif /* __has_feature(ptrauth_calls) */
2111
2112 /* ... and give it to the caller */
2113 if (error == 0) {
2114 error = copyout(&start_address,
2115 (user_addr_t) uap->start_address,
2116 sizeof(start_address));
2117 if (error != 0) {
2118 SHARED_REGION_TRACE_ERROR(
2119 ("shared_region: %p [%d(%s)] "
2120 "check_np(0x%llx) "
2121 "copyout(0x%llx) error %d\n",
2122 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2123 proc_getpid(p), p->p_comm,
2124 (uint64_t)uap->start_address, (uint64_t)start_address,
2125 error));
2126 }
2127 }
2128 }
2129 }
2130 vm_shared_region_deallocate(shared_region);
2131 } else {
2132 /* no shared region ! */
2133 error = EINVAL;
2134 }
2135
2136 SHARED_REGION_TRACE_DEBUG(
2137 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2138 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2139 proc_getpid(p), p->p_comm,
2140 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2141
2142 return error;
2143 }
2144
2145
2146 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2147 shared_region_copyin(
2148 struct proc *p,
2149 user_addr_t user_addr,
2150 unsigned int count,
2151 unsigned int element_size,
2152 void *kernel_data)
2153 {
2154 int error = 0;
2155 vm_size_t size = count * element_size;
2156
2157 error = copyin(user_addr, kernel_data, size);
2158 if (error) {
2159 SHARED_REGION_TRACE_ERROR(
2160 ("shared_region: %p [%d(%s)] map(): "
2161 "copyin(0x%llx, %ld) failed (error=%d)\n",
2162 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2163 proc_getpid(p), p->p_comm,
2164 (uint64_t)user_addr, (long)size, error));
2165 }
2166 return error;
2167 }
2168
2169 /*
2170 * A reasonable upper limit to prevent overflow of allocation/copyin.
2171 */
2172 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2173
2174 /* forward declaration */
2175 __attribute__((noinline))
2176 static void shared_region_map_and_slide_cleanup(
2177 struct proc *p,
2178 uint32_t files_count,
2179 struct _sr_file_mappings *sr_file_mappings,
2180 struct vm_shared_region *shared_region);
2181
2182 /*
2183 * Setup part of _shared_region_map_and_slide().
2184 * It had to be broken out of _shared_region_map_and_slide() to
2185 * prevent compiler inlining from blowing out the stack.
2186 */
2187 __attribute__((noinline))
2188 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2189 shared_region_map_and_slide_setup(
2190 struct proc *p,
2191 uint32_t files_count,
2192 struct shared_file_np *files,
2193 uint32_t mappings_count,
2194 struct shared_file_mapping_slide_np *mappings,
2195 struct _sr_file_mappings **sr_file_mappings,
2196 struct vm_shared_region **shared_region_ptr,
2197 struct vnode *rdir_vp)
2198 {
2199 int error = 0;
2200 struct _sr_file_mappings *srfmp;
2201 uint32_t mappings_next;
2202 struct vnode_attr va;
2203 off_t fs;
2204 #if CONFIG_MACF
2205 vm_prot_t maxprot = VM_PROT_ALL;
2206 #endif
2207 uint32_t i;
2208 struct vm_shared_region *shared_region = NULL;
2209 boolean_t is_driverkit = task_is_driver(current_task());
2210
2211 SHARED_REGION_TRACE_DEBUG(
2212 ("shared_region: %p [%d(%s)] -> map\n",
2213 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2214 proc_getpid(p), p->p_comm));
2215
2216 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2217 error = E2BIG;
2218 goto done;
2219 }
2220 if (files_count == 0) {
2221 error = EINVAL;
2222 goto done;
2223 }
2224 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2225 Z_WAITOK | Z_ZERO);
2226 if (*sr_file_mappings == NULL) {
2227 error = ENOMEM;
2228 goto done;
2229 }
2230 mappings_next = 0;
2231 for (i = 0; i < files_count; i++) {
2232 srfmp = &(*sr_file_mappings)[i];
2233 srfmp->fd = files[i].sf_fd;
2234 srfmp->mappings_count = files[i].sf_mappings_count;
2235 srfmp->mappings = &mappings[mappings_next];
2236 mappings_next += srfmp->mappings_count;
2237 if (mappings_next > mappings_count) {
2238 error = EINVAL;
2239 goto done;
2240 }
2241 srfmp->slide = files[i].sf_slide;
2242 }
2243
2244 /* get the process's shared region (setup in vm_map_exec()) */
2245 shared_region = vm_shared_region_trim_and_get(current_task());
2246 *shared_region_ptr = shared_region;
2247 if (shared_region == NULL) {
2248 SHARED_REGION_TRACE_ERROR(
2249 ("shared_region: %p [%d(%s)] map(): "
2250 "no shared region\n",
2251 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2252 proc_getpid(p), p->p_comm));
2253 error = EINVAL;
2254 goto done;
2255 }
2256
2257 /*
2258 * Check the shared region matches the current root
2259 * directory of this process. Deny the mapping to
2260 * avoid tainting the shared region with something that
2261 * doesn't quite belong into it.
2262 */
2263 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2264 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2265 SHARED_REGION_TRACE_ERROR(
2266 ("shared_region: map(%p) root_dir mismatch\n",
2267 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2268 error = EPERM;
2269 goto done;
2270 }
2271
2272
2273 for (srfmp = &(*sr_file_mappings)[0];
2274 srfmp < &(*sr_file_mappings)[files_count];
2275 srfmp++) {
2276 if (srfmp->mappings_count == 0) {
2277 /* no mappings here... */
2278 continue;
2279 }
2280
2281 /*
2282 * A file descriptor of -1 is used to indicate that the data
2283 * to be put in the shared region for this mapping comes directly
2284 * from the processes address space. Ensure we have proper alignments.
2285 */
2286 if (srfmp->fd == -1) {
2287 /* only allow one mapping per fd */
2288 if (srfmp->mappings_count > 1) {
2289 SHARED_REGION_TRACE_ERROR(
2290 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
2291 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2292 proc_getpid(p), p->p_comm));
2293 error = EINVAL;
2294 goto done;
2295 }
2296
2297 /*
2298 * The destination address and size must be page aligned.
2299 */
2300 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2301 mach_vm_address_t dest_addr = mapping->sms_address;
2302 mach_vm_size_t map_size = mapping->sms_size;
2303 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2304 SHARED_REGION_TRACE_ERROR(
2305 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2306 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2307 proc_getpid(p), p->p_comm, dest_addr));
2308 error = EINVAL;
2309 goto done;
2310 }
2311 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2312 SHARED_REGION_TRACE_ERROR(
2313 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2314 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2315 proc_getpid(p), p->p_comm, map_size));
2316 error = EINVAL;
2317 goto done;
2318 }
2319 continue;
2320 }
2321
2322 /* get file structure from file descriptor */
2323 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2324 if (error) {
2325 SHARED_REGION_TRACE_ERROR(
2326 ("shared_region: %p [%d(%s)] map: "
2327 "fd=%d lookup failed (error=%d)\n",
2328 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2329 proc_getpid(p), p->p_comm, srfmp->fd, error));
2330 goto done;
2331 }
2332
2333 /* we need at least read permission on the file */
2334 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2335 SHARED_REGION_TRACE_ERROR(
2336 ("shared_region: %p [%d(%s)] map: "
2337 "fd=%d not readable\n",
2338 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2339 proc_getpid(p), p->p_comm, srfmp->fd));
2340 error = EPERM;
2341 goto done;
2342 }
2343
2344 /* get vnode from file structure */
2345 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2346 if (error) {
2347 SHARED_REGION_TRACE_ERROR(
2348 ("shared_region: %p [%d(%s)] map: "
2349 "fd=%d getwithref failed (error=%d)\n",
2350 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2351 proc_getpid(p), p->p_comm, srfmp->fd, error));
2352 goto done;
2353 }
2354 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2355
2356 /* make sure the vnode is a regular file */
2357 if (srfmp->vp->v_type != VREG) {
2358 SHARED_REGION_TRACE_ERROR(
2359 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2360 "not a file (type=%d)\n",
2361 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2362 proc_getpid(p), p->p_comm,
2363 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2364 srfmp->vp->v_name, srfmp->vp->v_type));
2365 error = EINVAL;
2366 goto done;
2367 }
2368
2369 #if CONFIG_MACF
2370 /* pass in 0 for the offset argument because AMFI does not need the offset
2371 * of the shared cache */
2372 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2373 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2374 if (error) {
2375 goto done;
2376 }
2377 #endif /* MAC */
2378
2379 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2380 /*
2381 * Check if the shared cache is in the trust cache;
2382 * if so, we can skip the root ownership check.
2383 */
2384 #if DEVELOPMENT || DEBUG
2385 /*
2386 * Skip both root ownership and trust cache check if
2387 * enforcement is disabled.
2388 */
2389 if (!cs_system_enforcement()) {
2390 goto after_root_check;
2391 }
2392 #endif /* DEVELOPMENT || DEBUG */
2393 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2394 if (blob == NULL) {
2395 SHARED_REGION_TRACE_ERROR(
2396 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2397 "missing CS blob\n",
2398 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2399 proc_getpid(p), p->p_comm,
2400 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2401 srfmp->vp->v_name));
2402 goto root_check;
2403 }
2404 const uint8_t *cdhash = csblob_get_cdhash(blob);
2405 if (cdhash == NULL) {
2406 SHARED_REGION_TRACE_ERROR(
2407 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2408 "missing cdhash\n",
2409 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2410 proc_getpid(p), p->p_comm,
2411 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2412 srfmp->vp->v_name));
2413 goto root_check;
2414 }
2415
2416 bool in_trust_cache = false;
2417 TrustCacheQueryToken_t qt;
2418 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2419 TCType_t tc_type = kTCTypeInvalid;
2420 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2421 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2422 (tc_type == kTCTypeCryptex1BootOS ||
2423 tc_type == kTCTypeStatic ||
2424 tc_type == kTCTypeEngineering));
2425 }
2426 if (!in_trust_cache) {
2427 SHARED_REGION_TRACE_ERROR(
2428 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2429 "not in trust cache\n",
2430 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2431 proc_getpid(p), p->p_comm,
2432 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2433 srfmp->vp->v_name));
2434 goto root_check;
2435 }
2436 goto after_root_check;
2437 root_check:
2438 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2439
2440 /* The shared cache file must be owned by root */
2441 VATTR_INIT(&va);
2442 VATTR_WANTED(&va, va_uid);
2443 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2444 if (error) {
2445 SHARED_REGION_TRACE_ERROR(
2446 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2447 "vnode_getattr(%p) failed (error=%d)\n",
2448 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2449 proc_getpid(p), p->p_comm,
2450 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2451 srfmp->vp->v_name,
2452 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2453 error));
2454 goto done;
2455 }
2456 if (va.va_uid != 0) {
2457 SHARED_REGION_TRACE_ERROR(
2458 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2459 "owned by uid=%d instead of 0\n",
2460 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2461 proc_getpid(p), p->p_comm,
2462 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2463 srfmp->vp->v_name, va.va_uid));
2464 error = EPERM;
2465 goto done;
2466 }
2467
2468 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2469 after_root_check:
2470 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2471
2472 #if CONFIG_CSR
2473 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2474 VATTR_INIT(&va);
2475 VATTR_WANTED(&va, va_flags);
2476 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2477 if (error) {
2478 SHARED_REGION_TRACE_ERROR(
2479 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2480 "vnode_getattr(%p) failed (error=%d)\n",
2481 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2482 proc_getpid(p), p->p_comm,
2483 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2484 srfmp->vp->v_name,
2485 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2486 error));
2487 goto done;
2488 }
2489
2490 if (!(va.va_flags & SF_RESTRICTED)) {
2491 /*
2492 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2493 * the shared cache file is NOT SIP-protected, so reject the
2494 * mapping request
2495 */
2496 SHARED_REGION_TRACE_ERROR(
2497 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2498 "vnode is not SIP-protected. \n",
2499 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2500 proc_getpid(p), p->p_comm,
2501 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2502 srfmp->vp->v_name));
2503 error = EPERM;
2504 goto done;
2505 }
2506 }
2507 #else /* CONFIG_CSR */
2508
2509 /*
2510 * Devices without SIP/ROSP need to make sure that the shared cache
2511 * is either on the root volume or in the preboot cryptex volume.
2512 */
2513 assert(rdir_vp != NULL);
2514 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2515 vnode_t preboot_vp = NULL;
2516 #if XNU_TARGET_OS_OSX
2517 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2518 #else
2519 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2520 #endif
2521 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2522 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2523 SHARED_REGION_TRACE_ERROR(
2524 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2525 "not on process' root volume nor preboot volume\n",
2526 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2527 proc_getpid(p), p->p_comm,
2528 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2529 srfmp->vp->v_name));
2530 error = EPERM;
2531 if (preboot_vp) {
2532 (void)vnode_put(preboot_vp);
2533 }
2534 goto done;
2535 } else if (preboot_vp) {
2536 (void)vnode_put(preboot_vp);
2537 }
2538 }
2539 #endif /* CONFIG_CSR */
2540
2541 if (scdir_enforce) {
2542 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2543 struct vnode *scdir_vp = NULL;
2544 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2545 *expected_scdir_path != NULL;
2546 expected_scdir_path++) {
2547 /* get vnode for expected_scdir_path */
2548 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2549 if (error) {
2550 SHARED_REGION_TRACE_ERROR(
2551 ("shared_region: %p [%d(%s)]: "
2552 "vnode_lookup(%s) failed (error=%d)\n",
2553 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2554 proc_getpid(p), p->p_comm,
2555 *expected_scdir_path, error));
2556 continue;
2557 }
2558
2559 /* check if parent is scdir_vp */
2560 assert(scdir_vp != NULL);
2561 if (vnode_parent(srfmp->vp) == scdir_vp) {
2562 (void)vnode_put(scdir_vp);
2563 scdir_vp = NULL;
2564 goto scdir_ok;
2565 }
2566 (void)vnode_put(scdir_vp);
2567 scdir_vp = NULL;
2568 }
2569 /* nothing matches */
2570 SHARED_REGION_TRACE_ERROR(
2571 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2572 "shared cache file not in expected directory\n",
2573 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2574 proc_getpid(p), p->p_comm,
2575 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2576 srfmp->vp->v_name));
2577 error = EPERM;
2578 goto done;
2579 }
2580 scdir_ok:
2581
2582 /* get vnode size */
2583 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2584 if (error) {
2585 SHARED_REGION_TRACE_ERROR(
2586 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2587 "vnode_size(%p) failed (error=%d)\n",
2588 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2589 proc_getpid(p), p->p_comm,
2590 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2591 srfmp->vp->v_name,
2592 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2593 goto done;
2594 }
2595 srfmp->file_size = fs;
2596
2597 /* get the file's memory object handle */
2598 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2599 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2600 SHARED_REGION_TRACE_ERROR(
2601 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2602 "no memory object\n",
2603 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2604 proc_getpid(p), p->p_comm,
2605 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2606 srfmp->vp->v_name));
2607 error = EINVAL;
2608 goto done;
2609 }
2610
2611 /* check that the mappings are properly covered by code signatures */
2612 if (!cs_system_enforcement()) {
2613 /* code signing is not enforced: no need to check */
2614 } else {
2615 for (i = 0; i < srfmp->mappings_count; i++) {
2616 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2617 /* zero-filled mapping: not backed by the file */
2618 continue;
2619 }
2620 if (ubc_cs_is_range_codesigned(srfmp->vp,
2621 srfmp->mappings[i].sms_file_offset,
2622 srfmp->mappings[i].sms_size)) {
2623 /* this mapping is fully covered by code signatures */
2624 continue;
2625 }
2626 SHARED_REGION_TRACE_ERROR(
2627 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2628 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2629 "is not code-signed\n",
2630 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2631 proc_getpid(p), p->p_comm,
2632 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2633 srfmp->vp->v_name,
2634 i, srfmp->mappings_count,
2635 srfmp->mappings[i].sms_address,
2636 srfmp->mappings[i].sms_size,
2637 srfmp->mappings[i].sms_file_offset,
2638 srfmp->mappings[i].sms_max_prot,
2639 srfmp->mappings[i].sms_init_prot));
2640 error = EINVAL;
2641 goto done;
2642 }
2643 }
2644 }
2645 done:
2646 if (error != 0) {
2647 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2648 *sr_file_mappings = NULL;
2649 *shared_region_ptr = NULL;
2650 }
2651 return error;
2652 }
2653
2654 /*
2655 * shared_region_map_np()
2656 *
2657 * This system call is intended for dyld.
2658 *
2659 * dyld uses this to map a shared cache file into a shared region.
2660 * This is usually done only the first time a shared cache is needed.
2661 * Subsequent processes will just use the populated shared region without
2662 * requiring any further setup.
2663 */
2664 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2665 _shared_region_map_and_slide(
2666 struct proc *p,
2667 uint32_t files_count,
2668 struct shared_file_np *files,
2669 uint32_t mappings_count,
2670 struct shared_file_mapping_slide_np *mappings)
2671 {
2672 int error = 0;
2673 kern_return_t kr = KERN_SUCCESS;
2674 struct _sr_file_mappings *sr_file_mappings = NULL;
2675 struct vnode *rdir_vp = NULL;
2676 struct vm_shared_region *shared_region = NULL;
2677
2678 /*
2679 * Get a reference to the current proc's root dir.
2680 * Need this to prevent racing with chroot.
2681 */
2682 proc_fdlock(p);
2683 rdir_vp = p->p_fd.fd_rdir;
2684 if (rdir_vp == NULL) {
2685 rdir_vp = rootvnode;
2686 }
2687 assert(rdir_vp != NULL);
2688 vnode_get(rdir_vp);
2689 proc_fdunlock(p);
2690
2691 /*
2692 * Turn files, mappings into sr_file_mappings and other setup.
2693 */
2694 error = shared_region_map_and_slide_setup(p, files_count,
2695 files, mappings_count, mappings,
2696 &sr_file_mappings, &shared_region, rdir_vp);
2697 if (error != 0) {
2698 vnode_put(rdir_vp);
2699 return error;
2700 }
2701
2702 /* map the file(s) into that shared region's submap */
2703 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2704 if (kr != KERN_SUCCESS) {
2705 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2706 "vm_shared_region_map_file() failed kr=0x%x\n",
2707 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2708 proc_getpid(p), p->p_comm, kr));
2709 }
2710
2711 /* convert kern_return_t to errno */
2712 switch (kr) {
2713 case KERN_SUCCESS:
2714 error = 0;
2715 break;
2716 case KERN_INVALID_ADDRESS:
2717 error = EFAULT;
2718 break;
2719 case KERN_PROTECTION_FAILURE:
2720 error = EPERM;
2721 break;
2722 case KERN_NO_SPACE:
2723 error = ENOMEM;
2724 break;
2725 case KERN_FAILURE:
2726 case KERN_INVALID_ARGUMENT:
2727 default:
2728 error = EINVAL;
2729 break;
2730 }
2731
2732 /*
2733 * Mark that this process is now using split libraries.
2734 */
2735 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2736 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2737 }
2738
2739 vnode_put(rdir_vp);
2740 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2741
2742 SHARED_REGION_TRACE_DEBUG(
2743 ("shared_region: %p [%d(%s)] <- map\n",
2744 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2745 proc_getpid(p), p->p_comm));
2746
2747 return error;
2748 }
2749
2750 /*
2751 * Clean up part of _shared_region_map_and_slide()
2752 * It had to be broken out of _shared_region_map_and_slide() to
2753 * prevent compiler inlining from blowing out the stack.
2754 */
2755 __attribute__((noinline))
2756 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2757 shared_region_map_and_slide_cleanup(
2758 struct proc *p,
2759 uint32_t files_count,
2760 struct _sr_file_mappings *sr_file_mappings,
2761 struct vm_shared_region *shared_region)
2762 {
2763 struct _sr_file_mappings *srfmp;
2764 struct vnode_attr va;
2765
2766 if (sr_file_mappings != NULL) {
2767 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2768 if (srfmp->vp != NULL) {
2769 vnode_lock_spin(srfmp->vp);
2770 srfmp->vp->v_flag |= VSHARED_DYLD;
2771 vnode_unlock(srfmp->vp);
2772
2773 /* update the vnode's access time */
2774 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2775 VATTR_INIT(&va);
2776 nanotime(&va.va_access_time);
2777 VATTR_SET_ACTIVE(&va, va_access_time);
2778 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2779 }
2780
2781 #if NAMEDSTREAMS
2782 /*
2783 * If the shared cache is compressed, it may
2784 * have a namedstream vnode instantiated for
2785 * for it. That namedstream vnode will also
2786 * have to be marked with VSHARED_DYLD.
2787 */
2788 if (vnode_hasnamedstreams(srfmp->vp)) {
2789 vnode_t svp;
2790 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2791 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2792 vnode_lock_spin(svp);
2793 svp->v_flag |= VSHARED_DYLD;
2794 vnode_unlock(svp);
2795 vnode_put(svp);
2796 }
2797 }
2798 #endif /* NAMEDSTREAMS */
2799 /*
2800 * release the vnode...
2801 * ubc_map() still holds it for us in the non-error case
2802 */
2803 (void) vnode_put(srfmp->vp);
2804 srfmp->vp = NULL;
2805 }
2806 if (srfmp->fp != NULL) {
2807 /* release the file descriptor */
2808 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2809 srfmp->fp = NULL;
2810 }
2811 }
2812 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2813 }
2814
2815 if (shared_region != NULL) {
2816 vm_shared_region_deallocate(shared_region);
2817 }
2818 }
2819
2820
2821 /*
2822 * For each file mapped, we may have mappings for:
2823 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2824 * so let's round up to 8 mappings per file.
2825 */
2826 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2827
2828 /*
2829 * This is the new interface for setting up shared region mappings.
2830 *
2831 * The slide used for shared regions setup using this interface is done differently
2832 * from the old interface. The slide value passed in the shared_files_np represents
2833 * a max value. The kernel will choose a random value based on that, then use it
2834 * for all shared regions.
2835 */
2836 #if defined (__x86_64__)
2837 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2838 #else
2839 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2840 #endif
2841
2842 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2843 shared_region_map_and_slide_2_np(
2844 struct proc *p,
2845 struct shared_region_map_and_slide_2_np_args *uap,
2846 __unused int *retvalp)
2847 {
2848 unsigned int files_count;
2849 struct shared_file_np *shared_files = NULL;
2850 unsigned int mappings_count;
2851 struct shared_file_mapping_slide_np *mappings = NULL;
2852 kern_return_t kr = KERN_SUCCESS;
2853
2854 files_count = uap->files_count;
2855 mappings_count = uap->mappings_count;
2856
2857 if (files_count == 0) {
2858 SHARED_REGION_TRACE_INFO(
2859 ("shared_region: %p [%d(%s)] map(): "
2860 "no files\n",
2861 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2862 proc_getpid(p), p->p_comm));
2863 kr = 0; /* no files to map: we're done ! */
2864 goto done;
2865 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2866 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2867 if (shared_files == NULL) {
2868 kr = KERN_RESOURCE_SHORTAGE;
2869 goto done;
2870 }
2871 } else {
2872 SHARED_REGION_TRACE_ERROR(
2873 ("shared_region: %p [%d(%s)] map(): "
2874 "too many files (%d) max %d\n",
2875 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2876 proc_getpid(p), p->p_comm,
2877 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2878 kr = KERN_FAILURE;
2879 goto done;
2880 }
2881
2882 if (mappings_count == 0) {
2883 SHARED_REGION_TRACE_INFO(
2884 ("shared_region: %p [%d(%s)] map(): "
2885 "no mappings\n",
2886 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2887 proc_getpid(p), p->p_comm));
2888 kr = 0; /* no mappings: we're done ! */
2889 goto done;
2890 } else if (mappings_count <= SFM_MAX) {
2891 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2892 if (mappings == NULL) {
2893 kr = KERN_RESOURCE_SHORTAGE;
2894 goto done;
2895 }
2896 } else {
2897 SHARED_REGION_TRACE_ERROR(
2898 ("shared_region: %p [%d(%s)] map(): "
2899 "too many mappings (%d) max %d\n",
2900 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2901 proc_getpid(p), p->p_comm,
2902 mappings_count, SFM_MAX));
2903 kr = KERN_FAILURE;
2904 goto done;
2905 }
2906
2907 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2908 if (kr != KERN_SUCCESS) {
2909 goto done;
2910 }
2911
2912 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2913 if (kr != KERN_SUCCESS) {
2914 goto done;
2915 }
2916
2917 uint32_t max_slide = shared_files[0].sf_slide;
2918 uint32_t random_val;
2919 uint32_t slide_amount;
2920
2921 if (max_slide != 0) {
2922 read_random(&random_val, sizeof random_val);
2923 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2924 } else {
2925 slide_amount = 0;
2926 }
2927 #if DEVELOPMENT || DEBUG
2928 extern bool bootarg_disable_aslr;
2929 if (bootarg_disable_aslr) {
2930 slide_amount = 0;
2931 }
2932 #endif /* DEVELOPMENT || DEBUG */
2933
2934 /*
2935 * Fix up the mappings to reflect the desired slide.
2936 */
2937 unsigned int f;
2938 unsigned int m = 0;
2939 unsigned int i;
2940 for (f = 0; f < files_count; ++f) {
2941 shared_files[f].sf_slide = slide_amount;
2942 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2943 if (m >= mappings_count) {
2944 SHARED_REGION_TRACE_ERROR(
2945 ("shared_region: %p [%d(%s)] map(): "
2946 "mapping count argument was too small\n",
2947 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2948 proc_getpid(p), p->p_comm));
2949 kr = KERN_FAILURE;
2950 goto done;
2951 }
2952 mappings[m].sms_address += slide_amount;
2953 if (mappings[m].sms_slide_size != 0) {
2954 mappings[m].sms_slide_start += slide_amount;
2955 }
2956 }
2957 }
2958
2959 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2960 done:
2961 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2962 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2963 return kr;
2964 }
2965
2966 /*
2967 * A syscall for dyld to use to map data pages that need load time relocation fixups.
2968 * The fixups are performed by a custom pager during page-in, so the pages still appear
2969 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2970 * on demand later, all w/o using the compressor.
2971 *
2972 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2973 * running, they are COW'd as normal.
2974 */
2975 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2976 map_with_linking_np(
2977 struct proc *p,
2978 struct map_with_linking_np_args *uap,
2979 __unused int *retvalp)
2980 {
2981 uint32_t region_count;
2982 uint32_t r;
2983 struct mwl_region *regions = NULL;
2984 struct mwl_region *rp;
2985 uint32_t link_info_size;
2986 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
2987 struct mwl_info_hdr *info_hdr = NULL;
2988 uint64_t binds_size;
2989 int fd;
2990 struct fileproc *fp = NULL;
2991 struct vnode *vp = NULL;
2992 size_t file_size;
2993 off_t fs;
2994 struct vnode_attr va;
2995 memory_object_control_t file_control = NULL;
2996 int error;
2997 kern_return_t kr = KERN_SUCCESS;
2998
2999 /*
3000 * Check if dyld has told us it finished with this call.
3001 */
3002 if (p->p_disallow_map_with_linking) {
3003 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3004 __func__, proc_getpid(p), p->p_comm);
3005 kr = KERN_FAILURE;
3006 goto done;
3007 }
3008
3009 /*
3010 * First we do some sanity checking on what dyld has passed us.
3011 */
3012 region_count = uap->region_count;
3013 link_info_size = uap->link_info_size;
3014 if (region_count == 0) {
3015 printf("%s: [%d(%s)]: region_count == 0\n",
3016 __func__, proc_getpid(p), p->p_comm);
3017 kr = KERN_FAILURE;
3018 goto done;
3019 }
3020 if (region_count > MWL_MAX_REGION_COUNT) {
3021 printf("%s: [%d(%s)]: region_count too big %d\n",
3022 __func__, proc_getpid(p), p->p_comm, region_count);
3023 kr = KERN_FAILURE;
3024 goto done;
3025 }
3026
3027 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3028 printf("%s: [%d(%s)]: link_info_size too small\n",
3029 __func__, proc_getpid(p), p->p_comm);
3030 kr = KERN_FAILURE;
3031 goto done;
3032 }
3033 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3034 printf("%s: [%d(%s)]: link_info_size too big %d\n",
3035 __func__, proc_getpid(p), p->p_comm, link_info_size);
3036 kr = KERN_FAILURE;
3037 goto done;
3038 }
3039
3040 /*
3041 * Allocate and copyin the regions and link info
3042 */
3043 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3044 if (regions == NULL) {
3045 printf("%s: [%d(%s)]: failed to allocate regions\n",
3046 __func__, proc_getpid(p), p->p_comm);
3047 kr = KERN_RESOURCE_SHORTAGE;
3048 goto done;
3049 }
3050 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3051 if (kr != KERN_SUCCESS) {
3052 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3053 __func__, proc_getpid(p), p->p_comm, kr);
3054 goto done;
3055 }
3056
3057 link_info = kalloc_data(link_info_size, Z_WAITOK);
3058 if (link_info == NULL) {
3059 printf("%s: [%d(%s)]: failed to allocate link_info\n",
3060 __func__, proc_getpid(p), p->p_comm);
3061 kr = KERN_RESOURCE_SHORTAGE;
3062 goto done;
3063 }
3064 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3065 if (kr != KERN_SUCCESS) {
3066 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3067 __func__, proc_getpid(p), p->p_comm, kr);
3068 goto done;
3069 }
3070
3071 /*
3072 * Do some verification the data structures.
3073 */
3074 info_hdr = (struct mwl_info_hdr *)link_info;
3075 if (info_hdr->mwli_version != MWL_INFO_VERS) {
3076 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3077 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3078 kr = KERN_FAILURE;
3079 goto done;
3080 }
3081
3082 if (info_hdr->mwli_binds_offset > link_info_size) {
3083 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3084 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3085 kr = KERN_FAILURE;
3086 goto done;
3087 }
3088
3089 /* some older devs have s/w page size > h/w page size, no need to support them */
3090 if (info_hdr->mwli_page_size != PAGE_SIZE) {
3091 /* no printf, since this is expected on some devices */
3092 kr = KERN_INVALID_ARGUMENT;
3093 goto done;
3094 }
3095
3096 binds_size = (uint64_t)info_hdr->mwli_binds_count *
3097 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3098 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3099 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3100 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3101 kr = KERN_FAILURE;
3102 goto done;
3103 }
3104
3105 if (info_hdr->mwli_chains_offset > link_info_size) {
3106 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3107 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3108 kr = KERN_FAILURE;
3109 goto done;
3110 }
3111
3112
3113 /*
3114 * Ensure the chained starts in the link info and make sure the
3115 * segment info offsets are within bounds.
3116 */
3117 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3118 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3119 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3120 kr = KERN_FAILURE;
3121 goto done;
3122 }
3123 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3124 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3125 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3126 kr = KERN_FAILURE;
3127 goto done;
3128 }
3129
3130 /* Note that more verification of offsets is done in the pager itself */
3131
3132 /*
3133 * Ensure we've only been given one FD and verify valid protections.
3134 */
3135 fd = regions[0].mwlr_fd;
3136 for (r = 0; r < region_count; ++r) {
3137 if (regions[r].mwlr_fd != fd) {
3138 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3139 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3140 kr = KERN_FAILURE;
3141 goto done;
3142 }
3143 regions[r].mwlr_protections &= VM_PROT_ALL;
3144 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3145 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3146 __func__, proc_getpid(p), p->p_comm);
3147 kr = KERN_FAILURE;
3148 goto done;
3149 }
3150 }
3151
3152
3153 /* get file structure from file descriptor */
3154 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3155 if (error) {
3156 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3157 __func__, proc_getpid(p), p->p_comm, error);
3158 kr = KERN_FAILURE;
3159 goto done;
3160 }
3161
3162 /* We need at least read permission on the file */
3163 if (!(fp->fp_glob->fg_flag & FREAD)) {
3164 printf("%s: [%d(%s)]: not readable\n",
3165 __func__, proc_getpid(p), p->p_comm);
3166 kr = KERN_FAILURE;
3167 goto done;
3168 }
3169
3170 /* Get the vnode from file structure */
3171 vp = (struct vnode *)fp_get_data(fp);
3172 error = vnode_getwithref(vp);
3173 if (error) {
3174 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3175 __func__, proc_getpid(p), p->p_comm, error);
3176 kr = KERN_FAILURE;
3177 vp = NULL; /* just to be sure */
3178 goto done;
3179 }
3180
3181 /* Make sure the vnode is a regular file */
3182 if (vp->v_type != VREG) {
3183 printf("%s: [%d(%s)]: vnode not VREG\n",
3184 __func__, proc_getpid(p), p->p_comm);
3185 kr = KERN_FAILURE;
3186 goto done;
3187 }
3188
3189 /* get vnode size */
3190 error = vnode_size(vp, &fs, vfs_context_current());
3191 if (error) {
3192 goto done;
3193 }
3194 file_size = fs;
3195
3196 /* get the file's memory object handle */
3197 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3198 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3199 printf("%s: [%d(%s)]: no memory object\n",
3200 __func__, proc_getpid(p), p->p_comm);
3201 kr = KERN_FAILURE;
3202 goto done;
3203 }
3204
3205 for (r = 0; r < region_count; ++r) {
3206 rp = ®ions[r];
3207
3208 /*
3209 * Only allow data mappings and not zero fill.
3210 */
3211 if (rp->mwlr_protections & VM_PROT_ZF) {
3212 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF\n",
3213 __func__, proc_getpid(p), p->p_comm, r);
3214 kr = KERN_FAILURE;
3215 goto done;
3216 }
3217 if (rp->mwlr_protections & VM_PROT_EXECUTE) {
3218 printf("%s: [%d(%s)]: region %d, found VM_PROT_EXECUTE\n",
3219 __func__, proc_getpid(p), p->p_comm, r);
3220 kr = KERN_FAILURE;
3221 goto done;
3222 }
3223
3224 #if CONFIG_MACF
3225 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3226 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3227 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3228 if (error) {
3229 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3230 __func__, proc_getpid(p), p->p_comm, r, error);
3231 kr = KERN_FAILURE;
3232 goto done;
3233 }
3234 #endif /* MAC */
3235
3236 /* check that the mappings are properly covered by code signatures */
3237 if (cs_system_enforcement()) {
3238 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3239 printf("%s: [%d(%s)]: region %d, not code signed\n",
3240 __func__, proc_getpid(p), p->p_comm, r);
3241 kr = KERN_FAILURE;
3242 goto done;
3243 }
3244 }
3245 }
3246
3247 /* update the vnode's access time */
3248 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3249 VATTR_INIT(&va);
3250 nanotime(&va.va_access_time);
3251 VATTR_SET_ACTIVE(&va, va_access_time);
3252 vnode_setattr(vp, &va, vfs_context_current());
3253 }
3254
3255 /* get the VM to do the work */
3256 kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3257
3258 done:
3259 if (fp != NULL) {
3260 /* release the file descriptor */
3261 fp_drop(p, fd, fp, 0);
3262 }
3263 if (vp != NULL) {
3264 (void)vnode_put(vp);
3265 }
3266 if (regions != NULL) {
3267 kfree_data(regions, region_count * sizeof(regions[0]));
3268 }
3269 /* link info is used in the pager if things worked */
3270 if (link_info != NULL && kr != KERN_SUCCESS) {
3271 kfree_data(link_info, link_info_size);
3272 }
3273
3274 switch (kr) {
3275 case KERN_SUCCESS:
3276 return 0;
3277 case KERN_RESOURCE_SHORTAGE:
3278 return ENOMEM;
3279 default:
3280 return EINVAL;
3281 }
3282 }
3283
3284 #if DEBUG || DEVELOPMENT
3285 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3286 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3287 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3288 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3289 #endif /* DEBUG || DEVELOPMENT */
3290
3291 /* sysctl overflow room */
3292
3293 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3294 (int *) &page_size, 0, "vm page size");
3295
3296 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3297 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3298 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3299 extern unsigned int vm_page_free_target;
3300 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3301 &vm_page_free_target, 0, "Pageout daemon free target");
3302
3303 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3304 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3305
3306 static int
3307 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3308 {
3309 #pragma unused(oidp, arg1, arg2)
3310 unsigned int page_free_wanted;
3311
3312 page_free_wanted = mach_vm_ctl_page_free_wanted();
3313 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3314 }
3315 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3316 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3317 0, 0, vm_ctl_page_free_wanted, "I", "");
3318
3319 extern unsigned int vm_page_purgeable_count;
3320 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3321 &vm_page_purgeable_count, 0, "Purgeable page count");
3322
3323 extern unsigned int vm_page_purgeable_wired_count;
3324 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3325 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3326
3327 extern unsigned int vm_page_kern_lpage_count;
3328 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3329 &vm_page_kern_lpage_count, 0, "kernel used large pages");
3330
3331 #if DEVELOPMENT || DEBUG
3332 #if __ARM_MIXED_PAGE_SIZE__
3333 static int vm_mixed_pagesize_supported = 1;
3334 #else
3335 static int vm_mixed_pagesize_supported = 0;
3336 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3337 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3338 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3339
3340 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3341 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3342 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3343 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3344
3345 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3346 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3347 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3348 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3349 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3350 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3351
3352 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3353 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3354 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3355 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3356 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3357 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3358 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3359 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3360 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3361 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3362 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3363 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3364 #endif /* DEVELOPMENT || DEBUG */
3365
3366 extern int madvise_free_debug;
3367 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3368 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3369
3370 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3371 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3372 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3373 &vm_page_stats_reusable.reusable_pages_success, "");
3374 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3375 &vm_page_stats_reusable.reusable_pages_failure, "");
3376 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3377 &vm_page_stats_reusable.reusable_pages_shared, "");
3378 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3379 &vm_page_stats_reusable.all_reusable_calls, "");
3380 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3381 &vm_page_stats_reusable.partial_reusable_calls, "");
3382 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3383 &vm_page_stats_reusable.reuse_pages_success, "");
3384 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3385 &vm_page_stats_reusable.reuse_pages_failure, "");
3386 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3387 &vm_page_stats_reusable.all_reuse_calls, "");
3388 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3389 &vm_page_stats_reusable.partial_reuse_calls, "");
3390 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3391 &vm_page_stats_reusable.can_reuse_success, "");
3392 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3393 &vm_page_stats_reusable.can_reuse_failure, "");
3394 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3395 &vm_page_stats_reusable.reusable_reclaimed, "");
3396 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3397 &vm_page_stats_reusable.reusable_nonwritable, "");
3398 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3399 &vm_page_stats_reusable.reusable_shared, "");
3400 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3401 &vm_page_stats_reusable.free_shared, "");
3402
3403
3404 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3405 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3406 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3407
3408 extern unsigned int vm_page_cleaned_count;
3409 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3410
3411 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3412 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3413 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3414
3415 /* pageout counts */
3416 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3417 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3418
3419 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3420 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3421 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3422 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3423 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3424 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3425
3426 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3427 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3428 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3429 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3430 extern unsigned int vm_page_realtime_count;
3431 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3432 extern int vm_pageout_protect_realtime;
3433 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3434
3435 /* counts of pages prefaulted when entering a memory object */
3436 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3437 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3438 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3439
3440 #if defined (__x86_64__)
3441 extern unsigned int vm_clump_promote_threshold;
3442 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3443 #if DEVELOPMENT || DEBUG
3444 extern unsigned long vm_clump_stats[];
3445 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3446 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3447 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3448 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3449 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3450 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3451 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3452 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3453 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3454 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3455 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3456 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3457 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3458 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3459 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3460 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3461 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3462 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3463 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3464 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3465 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3466 #endif /* if DEVELOPMENT || DEBUG */
3467 #endif /* #if defined (__x86_64__) */
3468
3469 #if CONFIG_SECLUDED_MEMORY
3470
3471 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3472 extern unsigned int vm_page_secluded_target;
3473 extern unsigned int vm_page_secluded_count;
3474 extern unsigned int vm_page_secluded_count_free;
3475 extern unsigned int vm_page_secluded_count_inuse;
3476 extern unsigned int vm_page_secluded_count_over_target;
3477 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3478 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3479 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3480 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3481 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3482
3483 extern struct vm_page_secluded_data vm_page_secluded;
3484 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3485 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3486 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3487 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3488 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3489 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3490 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3491 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3492 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3493
3494 #endif /* CONFIG_SECLUDED_MEMORY */
3495
3496 #include <kern/thread.h>
3497 #include <sys/user.h>
3498
3499 void vm_pageout_io_throttle(void);
3500
3501 void
vm_pageout_io_throttle(void)3502 vm_pageout_io_throttle(void)
3503 {
3504 struct uthread *uthread = current_uthread();
3505
3506 /*
3507 * thread is marked as a low priority I/O type
3508 * and the I/O we issued while in this cleaning operation
3509 * collided with normal I/O operations... we'll
3510 * delay in order to mitigate the impact of this
3511 * task on the normal operation of the system
3512 */
3513
3514 if (uthread->uu_lowpri_window) {
3515 throttle_lowpri_io(1);
3516 }
3517 }
3518
3519 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3520 vm_pressure_monitor(
3521 __unused struct proc *p,
3522 struct vm_pressure_monitor_args *uap,
3523 int *retval)
3524 {
3525 kern_return_t kr;
3526 uint32_t pages_reclaimed;
3527 uint32_t pages_wanted;
3528
3529 kr = mach_vm_pressure_monitor(
3530 (boolean_t) uap->wait_for_pressure,
3531 uap->nsecs_monitored,
3532 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3533 &pages_wanted);
3534
3535 switch (kr) {
3536 case KERN_SUCCESS:
3537 break;
3538 case KERN_ABORTED:
3539 return EINTR;
3540 default:
3541 return EINVAL;
3542 }
3543
3544 if (uap->pages_reclaimed) {
3545 if (copyout((void *)&pages_reclaimed,
3546 uap->pages_reclaimed,
3547 sizeof(pages_reclaimed)) != 0) {
3548 return EFAULT;
3549 }
3550 }
3551
3552 *retval = (int) pages_wanted;
3553 return 0;
3554 }
3555
3556 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3557 kas_info(struct proc *p,
3558 struct kas_info_args *uap,
3559 int *retval __unused)
3560 {
3561 #ifndef CONFIG_KAS_INFO
3562 (void)p;
3563 (void)uap;
3564 return ENOTSUP;
3565 #else /* CONFIG_KAS_INFO */
3566 int selector = uap->selector;
3567 user_addr_t valuep = uap->value;
3568 user_addr_t sizep = uap->size;
3569 user_size_t size, rsize;
3570 int error;
3571
3572 if (!kauth_cred_issuser(kauth_cred_get())) {
3573 return EPERM;
3574 }
3575
3576 #if CONFIG_MACF
3577 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3578 if (error) {
3579 return error;
3580 }
3581 #endif
3582
3583 if (IS_64BIT_PROCESS(p)) {
3584 user64_size_t size64;
3585 error = copyin(sizep, &size64, sizeof(size64));
3586 size = (user_size_t)size64;
3587 } else {
3588 user32_size_t size32;
3589 error = copyin(sizep, &size32, sizeof(size32));
3590 size = (user_size_t)size32;
3591 }
3592 if (error) {
3593 return error;
3594 }
3595
3596 switch (selector) {
3597 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3598 {
3599 uint64_t slide = vm_kernel_slide;
3600
3601 if (sizeof(slide) != size) {
3602 return EINVAL;
3603 }
3604
3605 error = copyout(&slide, valuep, sizeof(slide));
3606 if (error) {
3607 return error;
3608 }
3609 rsize = size;
3610 }
3611 break;
3612 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3613 {
3614 uint32_t i;
3615 kernel_mach_header_t *mh = &_mh_execute_header;
3616 struct load_command *cmd;
3617 cmd = (struct load_command*) &mh[1];
3618 uint64_t *bases;
3619 rsize = mh->ncmds * sizeof(uint64_t);
3620
3621 /*
3622 * Return the size if no data was passed
3623 */
3624 if (valuep == 0) {
3625 break;
3626 }
3627
3628 if (rsize > size) {
3629 return EINVAL;
3630 }
3631
3632 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3633
3634 for (i = 0; i < mh->ncmds; i++) {
3635 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3636 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3637 bases[i] = (uint64_t)sg->vmaddr;
3638 }
3639 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3640 }
3641
3642 error = copyout(bases, valuep, rsize);
3643
3644 kfree_data(bases, rsize);
3645
3646 if (error) {
3647 return error;
3648 }
3649 }
3650 break;
3651 default:
3652 return EINVAL;
3653 }
3654
3655 if (IS_64BIT_PROCESS(p)) {
3656 user64_size_t size64 = (user64_size_t)rsize;
3657 error = copyout(&size64, sizep, sizeof(size64));
3658 } else {
3659 user32_size_t size32 = (user32_size_t)rsize;
3660 error = copyout(&size32, sizep, sizeof(size32));
3661 }
3662
3663 return error;
3664 #endif /* CONFIG_KAS_INFO */
3665 }
3666
3667 #if __has_feature(ptrauth_calls)
3668 /*
3669 * Generate a random pointer signing key that isn't 0.
3670 */
3671 uint64_t
generate_jop_key(void)3672 generate_jop_key(void)
3673 {
3674 uint64_t key;
3675
3676 do {
3677 read_random(&key, sizeof key);
3678 } while (key == 0);
3679 return key;
3680 }
3681 #endif /* __has_feature(ptrauth_calls) */
3682
3683
3684 #pragma clang diagnostic push
3685 #pragma clang diagnostic ignored "-Wcast-qual"
3686 #pragma clang diagnostic ignored "-Wunused-function"
3687
3688 static void
asserts()3689 asserts()
3690 {
3691 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3692 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3693 }
3694
3695 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3696 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3697 #pragma clang diagnostic pop
3698
3699 extern uint32_t vm_page_pages;
3700 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3701
3702 extern uint32_t vm_page_busy_absent_skipped;
3703 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3704
3705 extern uint32_t vm_page_upl_tainted;
3706 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3707
3708 extern uint32_t vm_page_iopl_tainted;
3709 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3710
3711 #if __arm64__ && (DEVELOPMENT || DEBUG)
3712 extern int vm_footprint_suspend_allowed;
3713 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3714
3715 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3716 static int
3717 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3718 {
3719 #pragma unused(oidp, arg1, arg2)
3720 int error = 0;
3721 int new_value;
3722
3723 if (req->newptr == USER_ADDR_NULL) {
3724 return 0;
3725 }
3726 error = SYSCTL_IN(req, &new_value, sizeof(int));
3727 if (error) {
3728 return error;
3729 }
3730 if (!vm_footprint_suspend_allowed) {
3731 if (new_value != 0) {
3732 /* suspends are not allowed... */
3733 return 0;
3734 }
3735 /* ... but let resumes proceed */
3736 }
3737 DTRACE_VM2(footprint_suspend,
3738 vm_map_t, current_map(),
3739 int, new_value);
3740
3741 pmap_footprint_suspend(current_map(), new_value);
3742
3743 return 0;
3744 }
3745 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3746 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3747 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3748 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3749
3750 extern uint64_t vm_map_corpse_footprint_count;
3751 extern uint64_t vm_map_corpse_footprint_size_avg;
3752 extern uint64_t vm_map_corpse_footprint_size_max;
3753 extern uint64_t vm_map_corpse_footprint_full;
3754 extern uint64_t vm_map_corpse_footprint_no_buf;
3755 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3756 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3757 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3758 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3759 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3760 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3761 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3762 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3763 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3764 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3765
3766
3767 extern uint64_t shared_region_pager_copied;
3768 extern uint64_t shared_region_pager_slid;
3769 extern uint64_t shared_region_pager_slid_error;
3770 extern uint64_t shared_region_pager_reclaimed;
3771 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3772 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3773 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3774 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3775 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3776 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3777 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3778 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3779 extern int shared_region_destroy_delay;
3780 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3781 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3782
3783 #if MACH_ASSERT
3784 extern int pmap_ledgers_panic_leeway;
3785 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3786 #endif /* MACH_ASSERT */
3787
3788
3789 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3790 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3791 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3792 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3793 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3794 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3795 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3796 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3797 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3798 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3799 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3800 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3801 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3802 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3803 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3804 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3805 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3806 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3807 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3808 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3809 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3810 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3811 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3812 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3813 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3814 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3815 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3816 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3817 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3818 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3819 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3820 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3821 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3822 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3823 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3824 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3825 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3826 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3827 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3828
3829 extern int vm_protect_privileged_from_untrusted;
3830 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3831 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3832 extern uint64_t vm_copied_on_read;
3833 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3834 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3835
3836 extern int vm_shared_region_count;
3837 extern int vm_shared_region_peak;
3838 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3839 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3840 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3841 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3842 #if DEVELOPMENT || DEBUG
3843 extern unsigned int shared_region_pagers_resident_count;
3844 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3845 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3846 extern unsigned int shared_region_pagers_resident_peak;
3847 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3848 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3849 extern int shared_region_pager_count;
3850 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3851 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3852 #if __has_feature(ptrauth_calls)
3853 extern int shared_region_key_count;
3854 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3855 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3856 extern int vm_shared_region_reslide_count;
3857 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3858 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3859 #endif /* __has_feature(ptrauth_calls) */
3860 #endif /* DEVELOPMENT || DEBUG */
3861
3862 #if MACH_ASSERT
3863 extern int debug4k_filter;
3864 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3865 extern int debug4k_panic_on_terminate;
3866 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3867 extern int debug4k_panic_on_exception;
3868 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3869 extern int debug4k_panic_on_misaligned_sharing;
3870 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3871 #endif /* MACH_ASSERT */
3872
3873 extern uint64_t vm_map_set_size_limit_count;
3874 extern uint64_t vm_map_set_data_limit_count;
3875 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3876 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3877 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3878 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3879 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3880 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3881
3882 extern uint64_t vm_fault_resilient_media_initiate;
3883 extern uint64_t vm_fault_resilient_media_retry;
3884 extern uint64_t vm_fault_resilient_media_proceed;
3885 extern uint64_t vm_fault_resilient_media_release;
3886 extern uint64_t vm_fault_resilient_media_abort1;
3887 extern uint64_t vm_fault_resilient_media_abort2;
3888 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3889 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3890 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3891 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3892 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3893 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3894 #if MACH_ASSERT
3895 extern int vm_fault_resilient_media_inject_error1_rate;
3896 extern int vm_fault_resilient_media_inject_error1;
3897 extern int vm_fault_resilient_media_inject_error2_rate;
3898 extern int vm_fault_resilient_media_inject_error2;
3899 extern int vm_fault_resilient_media_inject_error3_rate;
3900 extern int vm_fault_resilient_media_inject_error3;
3901 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3902 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3903 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3904 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3905 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3906 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3907 #endif /* MACH_ASSERT */
3908
3909 extern uint64_t pmap_query_page_info_retries;
3910 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3911
3912 /*
3913 * A sysctl which causes all existing shared regions to become stale. They
3914 * will no longer be used by anything new and will be torn down as soon as
3915 * the last existing user exits. A write of non-zero value causes that to happen.
3916 * This should only be used by launchd, so we check that this is initproc.
3917 */
3918 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3919 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3920 {
3921 unsigned int value = 0;
3922 int changed = 0;
3923 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3924 if (error || !changed) {
3925 return error;
3926 }
3927 if (current_proc() != initproc) {
3928 return EPERM;
3929 }
3930
3931 vm_shared_region_pivot();
3932
3933 return 0;
3934 }
3935
3936 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3937 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3938 0, 0, shared_region_pivot, "I", "");
3939
3940 extern uint64_t vm_object_shadow_forced;
3941 extern uint64_t vm_object_shadow_skipped;
3942 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3943 &vm_object_shadow_forced, "");
3944 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3945 &vm_object_shadow_skipped, "");
3946
3947 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3948 &vmtc_total, 0, "total text page corruptions detected");
3949
3950
3951 #if DEBUG || DEVELOPMENT
3952 /*
3953 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3954 * Used for testing text page self healing.
3955 */
3956 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3957 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3958 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3959 {
3960 uint64_t value = 0;
3961 int error = sysctl_handle_quad(oidp, &value, 0, req);
3962 if (error || !req->newptr) {
3963 return error;
3964 }
3965
3966 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3967 return 0;
3968 } else {
3969 return EINVAL;
3970 }
3971 }
3972
3973 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3974 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3975 0, 0, corrupt_text_addr, "-", "");
3976 #endif /* DEBUG || DEVELOPMENT */
3977
3978 #if DEBUG || DEVELOPMENT
3979 #if CONFIG_MAP_RANGES
3980 static int
3981 vm_map_user_range_default SYSCTL_HANDLER_ARGS
3982 {
3983 #pragma unused(arg1, arg2, oidp)
3984 struct mach_vm_range range;
3985
3986 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
3987 != KERN_SUCCESS) {
3988 return EINVAL;
3989 }
3990
3991 return SYSCTL_OUT(req, &range, sizeof(range));
3992 }
3993
3994 static int
3995 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
3996 {
3997 #pragma unused(arg1, arg2, oidp)
3998 struct mach_vm_range range;
3999
4000 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4001 != KERN_SUCCESS) {
4002 return EINVAL;
4003 }
4004
4005 return SYSCTL_OUT(req, &range, sizeof(range));
4006 }
4007
4008 /*
4009 * A sysctl that can be used to return ranges for the current VM map.
4010 * Used for testing VM ranges.
4011 */
4012 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4013 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4014 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4015 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4016
4017 #endif /* CONFIG_MAP_RANGES */
4018 #endif /* DEBUG || DEVELOPMENT */
4019
4020 extern uint64_t c_seg_filled_no_contention;
4021 extern uint64_t c_seg_filled_contention;
4022 extern clock_sec_t c_seg_filled_contention_sec_max;
4023 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4024 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4025 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4026 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4027 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4028 #if (XNU_TARGET_OS_OSX && __arm64__)
4029 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4030 extern int c_process_major_yield_after; /* yield after moving ? segments */
4031 extern uint64_t c_process_major_reports;
4032 extern clock_sec_t c_process_major_max_sec;
4033 extern clock_nsec_t c_process_major_max_nsec;
4034 extern uint32_t c_process_major_peak_segcount;
4035 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4036 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4037 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4038 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4039 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4040 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4041 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4042
4043 #if DEVELOPMENT || DEBUG
4044 extern int panic_object_not_alive;
4045 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4046 #endif /* DEVELOPMENT || DEBUG */
4047
4048 #if MACH_ASSERT
4049 extern int fbdp_no_panic;
4050 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4051 #endif /* MACH_ASSERT */
4052