1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #include <sys/code_signing.h>
86 #if NECP
87 #include <net/necp.h>
88 #endif /* NECP */
89 #if SKYWALK
90 #include <skywalk/os_channel.h>
91 #endif /* SKYWALK */
92
93 #include <security/audit/audit.h>
94 #include <security/mac.h>
95 #include <bsm/audit_kevents.h>
96
97 #include <kern/kalloc.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_kern.h>
100 #include <vm/vm_pageout.h>
101
102 #include <mach/shared_region.h>
103 #include <vm/vm_shared_region.h>
104
105 #include <vm/vm_dyld_pager.h>
106
107 #include <vm/vm_protos.h>
108
109 #include <sys/kern_memorystatus.h>
110 #include <sys/kern_memorystatus_freeze.h>
111 #include <sys/proc_internal.h>
112
113 #include <mach-o/fixup-chains.h>
114
115 #if CONFIG_MACF
116 #include <security/mac_framework.h>
117 #endif
118
119 #include <kern/bits.h>
120
121 #if CONFIG_CSR
122 #include <sys/csr.h>
123 #endif /* CONFIG_CSR */
124 #include <sys/trust_caches.h>
125 #include <libkern/amfi/amfi.h>
126 #include <IOKit/IOBSD.h>
127
128 #if VM_MAP_DEBUG_APPLE_PROTECT
129 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
130 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
131
132 #if VM_MAP_DEBUG_FOURK
133 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
134 #endif /* VM_MAP_DEBUG_FOURK */
135
136 #if DEVELOPMENT || DEBUG
137
138 static int
139 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
140 {
141 #pragma unused(arg1, arg2)
142 vm_offset_t kaddr;
143 kern_return_t kr;
144 int error = 0;
145 int size = 0;
146
147 error = sysctl_handle_int(oidp, &size, 0, req);
148 if (error || !req->newptr) {
149 return error;
150 }
151
152 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
153 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
154
155 if (kr == KERN_SUCCESS) {
156 kmem_free(kernel_map, kaddr, size);
157 }
158
159 return error;
160 }
161
162 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
163 0, 0, &sysctl_kmem_alloc_contig, "I", "");
164
165 extern int vm_region_footprint;
166 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
167
168 static int
169 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
170 {
171 #pragma unused(arg1, arg2, oidp)
172 kmem_gobj_stats stats = kmem_get_gobj_stats();
173
174 return SYSCTL_OUT(req, &stats, sizeof(stats));
175 }
176
177 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
178 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
179 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
180
181 #endif /* DEVELOPMENT || DEBUG */
182
183 static int
184 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
185 {
186 #pragma unused(arg1, arg2, oidp)
187 int error = 0;
188 int value;
189
190 value = task_self_region_footprint();
191 error = SYSCTL_OUT(req, &value, sizeof(int));
192 if (error) {
193 return error;
194 }
195
196 if (!req->newptr) {
197 return 0;
198 }
199
200 error = SYSCTL_IN(req, &value, sizeof(int));
201 if (error) {
202 return error;
203 }
204 task_self_region_footprint_set(value);
205 return 0;
206 }
207 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
208
209 static int
210 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
211 {
212 #pragma unused(arg1, arg2, oidp)
213 int error = 0;
214 int value;
215
216 value = (1 << thread_self_region_page_shift());
217 error = SYSCTL_OUT(req, &value, sizeof(int));
218 if (error) {
219 return error;
220 }
221
222 if (!req->newptr) {
223 return 0;
224 }
225
226 error = SYSCTL_IN(req, &value, sizeof(int));
227 if (error) {
228 return error;
229 }
230
231 if (value != 0 && value != 4096 && value != 16384) {
232 return EINVAL;
233 }
234
235 #if !__ARM_MIXED_PAGE_SIZE__
236 if (value != vm_map_page_size(current_map())) {
237 return EINVAL;
238 }
239 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
240
241 thread_self_region_page_shift_set(bit_first(value));
242 return 0;
243 }
244 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
245
246
247 #if DEVELOPMENT || DEBUG
248 extern int panic_on_unsigned_execute;
249 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
250
251 extern int vm_log_xnu_user_debug;
252 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
253 #endif /* DEVELOPMENT || DEBUG */
254
255 extern int cs_executable_create_upl;
256 extern int cs_executable_wire;
257 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
259
260 extern int apple_protect_pager_count;
261 extern int apple_protect_pager_count_mapped;
262 extern unsigned int apple_protect_pager_cache_limit;
263 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
265 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
266
267 #if DEVELOPMENT || DEBUG
268 extern int radar_20146450;
269 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
270
271 extern int macho_printf;
272 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
273
274 extern int apple_protect_pager_data_request_debug;
275 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
276
277 #if __arm64__
278 /* These are meant to support the page table accounting unit test. */
279 extern unsigned int arm_hardware_page_size;
280 extern unsigned int arm_pt_desc_size;
281 extern unsigned int arm_pt_root_size;
282 extern unsigned int inuse_user_tteroot_count;
283 extern unsigned int inuse_kernel_tteroot_count;
284 extern unsigned int inuse_user_ttepages_count;
285 extern unsigned int inuse_kernel_ttepages_count;
286 extern unsigned int inuse_user_ptepages_count;
287 extern unsigned int inuse_kernel_ptepages_count;
288 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
289 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
297 extern unsigned int free_page_size_tt_count;
298 extern unsigned int free_two_page_size_tt_count;
299 extern unsigned int free_tt_count;
300 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
301 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
303 #if DEVELOPMENT || DEBUG
304 extern unsigned long pmap_asid_flushes;
305 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
306 extern unsigned long pmap_asid_hits;
307 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
308 extern unsigned long pmap_asid_misses;
309 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
310 #endif
311 #endif /* __arm64__ */
312
313 #if __arm64__
314 extern int fourk_pager_data_request_debug;
315 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
316 #endif /* __arm64__ */
317 #endif /* DEVELOPMENT || DEBUG */
318
319 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
328 #if VM_SCAN_FOR_SHADOW_CHAIN
329 static int vm_shadow_max_enabled = 0; /* Disabled by default */
330 extern int proc_shadow_max(void);
331 static int
332 vm_shadow_max SYSCTL_HANDLER_ARGS
333 {
334 #pragma unused(arg1, arg2, oidp)
335 int value = 0;
336
337 if (vm_shadow_max_enabled) {
338 value = proc_shadow_max();
339 }
340
341 return SYSCTL_OUT(req, &value, sizeof(value));
342 }
343 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
344 0, 0, &vm_shadow_max, "I", "");
345
346 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
347
348 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
349
350 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
351
352 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
353 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
354 /*
355 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
356 */
357
358 #if DEVELOPMENT || DEBUG
359 extern int allow_stack_exec, allow_data_exec;
360
361 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
362 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
363
364 #endif /* DEVELOPMENT || DEBUG */
365
366 static const char *prot_values[] = {
367 "none",
368 "read-only",
369 "write-only",
370 "read-write",
371 "execute-only",
372 "read-execute",
373 "write-execute",
374 "read-write-execute"
375 };
376
377 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)378 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
379 {
380 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
381 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
382 }
383
384 /*
385 * shared_region_unnest_logging: level of logging of unnesting events
386 * 0 - no logging
387 * 1 - throttled logging of unexpected unnesting events (default)
388 * 2 - unthrottled logging of unexpected unnesting events
389 * 3+ - unthrottled logging of all unnesting events
390 */
391 int shared_region_unnest_logging = 1;
392
393 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
394 &shared_region_unnest_logging, 0, "");
395
396 int vm_shared_region_unnest_log_interval = 10;
397 int shared_region_unnest_log_count_threshold = 5;
398
399
400 #if XNU_TARGET_OS_OSX
401
402 #if defined (__x86_64__)
403 static int scdir_enforce = 1;
404 #else /* defined (__x86_64__) */
405 static int scdir_enforce = 0; /* AOT caches live elsewhere */
406 #endif /* defined (__x86_64__) */
407
408 static char *scdir_path[] = {
409 "/System/Library/dyld/",
410 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
411 "/System/Cryptexes/OS/System/Library/dyld",
412 NULL
413 };
414
415 #else /* XNU_TARGET_OS_OSX */
416
417 static int scdir_enforce = 0;
418 static char *scdir_path[] = {
419 "/System/Library/Caches/com.apple.dyld/",
420 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
421 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 NULL
423 };
424
425 #endif /* XNU_TARGET_OS_OSX */
426
427 static char *driverkit_scdir_path[] = {
428 "/System/DriverKit/System/Library/dyld/",
429 #if XNU_TARGET_OS_OSX
430 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
431 #else
432 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
433 #endif /* XNU_TARGET_OS_OSX */
434 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
435 NULL
436 };
437
438 #ifndef SECURE_KERNEL
439 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
440 {
441 #if CONFIG_CSR
442 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
443 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
444 return EPERM;
445 }
446 #endif /* CONFIG_CSR */
447 return sysctl_handle_int(oidp, arg1, arg2, req);
448 }
449
450 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
451 #endif
452
453 /* These log rate throttling state variables aren't thread safe, but
454 * are sufficient unto the task.
455 */
456 static int64_t last_unnest_log_time = 0;
457 static int shared_region_unnest_log_count = 0;
458
459 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)460 log_unnest_badness(
461 vm_map_t m,
462 vm_map_offset_t s,
463 vm_map_offset_t e,
464 boolean_t is_nested_map,
465 vm_map_offset_t lowest_unnestable_addr)
466 {
467 struct timeval tv;
468
469 if (shared_region_unnest_logging == 0) {
470 return;
471 }
472
473 if (shared_region_unnest_logging <= 2 &&
474 is_nested_map &&
475 s >= lowest_unnestable_addr) {
476 /*
477 * Unnesting of writable map entries is fine.
478 */
479 return;
480 }
481
482 if (shared_region_unnest_logging <= 1) {
483 microtime(&tv);
484 if ((tv.tv_sec - last_unnest_log_time) <
485 vm_shared_region_unnest_log_interval) {
486 if (shared_region_unnest_log_count++ >
487 shared_region_unnest_log_count_threshold) {
488 return;
489 }
490 } else {
491 last_unnest_log_time = tv.tv_sec;
492 shared_region_unnest_log_count = 0;
493 }
494 }
495
496 DTRACE_VM4(log_unnest_badness,
497 vm_map_t, m,
498 vm_map_offset_t, s,
499 vm_map_offset_t, e,
500 vm_map_offset_t, lowest_unnestable_addr);
501 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
502 }
503
504 int
useracc(user_addr_t addr,user_size_t len,int prot)505 useracc(
506 user_addr_t addr,
507 user_size_t len,
508 int prot)
509 {
510 vm_map_t map;
511
512 map = current_map();
513 return vm_map_check_protection(
514 map,
515 vm_map_trunc_page(addr,
516 vm_map_page_mask(map)),
517 vm_map_round_page(addr + len,
518 vm_map_page_mask(map)),
519 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
520 }
521
522 int
vslock(user_addr_t addr,user_size_t len)523 vslock(
524 user_addr_t addr,
525 user_size_t len)
526 {
527 kern_return_t kret;
528 vm_map_t map;
529
530 map = current_map();
531 kret = vm_map_wire_kernel(map,
532 vm_map_trunc_page(addr,
533 vm_map_page_mask(map)),
534 vm_map_round_page(addr + len,
535 vm_map_page_mask(map)),
536 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
537 FALSE);
538
539 switch (kret) {
540 case KERN_SUCCESS:
541 return 0;
542 case KERN_INVALID_ADDRESS:
543 case KERN_NO_SPACE:
544 return ENOMEM;
545 case KERN_PROTECTION_FAILURE:
546 return EACCES;
547 default:
548 return EINVAL;
549 }
550 }
551
552 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)553 vsunlock(
554 user_addr_t addr,
555 user_size_t len,
556 __unused int dirtied)
557 {
558 #if FIXME /* [ */
559 pmap_t pmap;
560 vm_page_t pg;
561 vm_map_offset_t vaddr;
562 ppnum_t paddr;
563 #endif /* FIXME ] */
564 kern_return_t kret;
565 vm_map_t map;
566
567 map = current_map();
568
569 #if FIXME /* [ */
570 if (dirtied) {
571 pmap = get_task_pmap(current_task());
572 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
573 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
574 vaddr += PAGE_SIZE) {
575 paddr = pmap_find_phys(pmap, vaddr);
576 pg = PHYS_TO_VM_PAGE(paddr);
577 vm_page_set_modified(pg);
578 }
579 }
580 #endif /* FIXME ] */
581 #ifdef lint
582 dirtied++;
583 #endif /* lint */
584 kret = vm_map_unwire(map,
585 vm_map_trunc_page(addr,
586 vm_map_page_mask(map)),
587 vm_map_round_page(addr + len,
588 vm_map_page_mask(map)),
589 FALSE);
590 switch (kret) {
591 case KERN_SUCCESS:
592 return 0;
593 case KERN_INVALID_ADDRESS:
594 case KERN_NO_SPACE:
595 return ENOMEM;
596 case KERN_PROTECTION_FAILURE:
597 return EACCES;
598 default:
599 return EINVAL;
600 }
601 }
602
603 int
subyte(user_addr_t addr,int byte)604 subyte(
605 user_addr_t addr,
606 int byte)
607 {
608 char character;
609
610 character = (char)byte;
611 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
612 }
613
614 int
suibyte(user_addr_t addr,int byte)615 suibyte(
616 user_addr_t addr,
617 int byte)
618 {
619 char character;
620
621 character = (char)byte;
622 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
623 }
624
625 int
fubyte(user_addr_t addr)626 fubyte(user_addr_t addr)
627 {
628 unsigned char byte;
629
630 if (copyin(addr, (void *) &byte, sizeof(char))) {
631 return -1;
632 }
633 return byte;
634 }
635
636 int
fuibyte(user_addr_t addr)637 fuibyte(user_addr_t addr)
638 {
639 unsigned char byte;
640
641 if (copyin(addr, (void *) &(byte), sizeof(char))) {
642 return -1;
643 }
644 return byte;
645 }
646
647 int
suword(user_addr_t addr,long word)648 suword(
649 user_addr_t addr,
650 long word)
651 {
652 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
653 }
654
655 long
fuword(user_addr_t addr)656 fuword(user_addr_t addr)
657 {
658 long word = 0;
659
660 if (copyin(addr, (void *) &word, sizeof(int))) {
661 return -1;
662 }
663 return word;
664 }
665
666 /* suiword and fuiword are the same as suword and fuword, respectively */
667
668 int
suiword(user_addr_t addr,long word)669 suiword(
670 user_addr_t addr,
671 long word)
672 {
673 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
674 }
675
676 long
fuiword(user_addr_t addr)677 fuiword(user_addr_t addr)
678 {
679 long word = 0;
680
681 if (copyin(addr, (void *) &word, sizeof(int))) {
682 return -1;
683 }
684 return word;
685 }
686
687 /*
688 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
689 * fetching and setting of process-sized size_t and pointer values.
690 */
691 int
sulong(user_addr_t addr,int64_t word)692 sulong(user_addr_t addr, int64_t word)
693 {
694 if (IS_64BIT_PROCESS(current_proc())) {
695 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
696 } else {
697 return suiword(addr, (long)word);
698 }
699 }
700
701 int64_t
fulong(user_addr_t addr)702 fulong(user_addr_t addr)
703 {
704 int64_t longword;
705
706 if (IS_64BIT_PROCESS(current_proc())) {
707 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
708 return -1;
709 }
710 return longword;
711 } else {
712 return (int64_t)fuiword(addr);
713 }
714 }
715
716 int
suulong(user_addr_t addr,uint64_t uword)717 suulong(user_addr_t addr, uint64_t uword)
718 {
719 if (IS_64BIT_PROCESS(current_proc())) {
720 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
721 } else {
722 return suiword(addr, (uint32_t)uword);
723 }
724 }
725
726 uint64_t
fuulong(user_addr_t addr)727 fuulong(user_addr_t addr)
728 {
729 uint64_t ulongword;
730
731 if (IS_64BIT_PROCESS(current_proc())) {
732 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
733 return -1ULL;
734 }
735 return ulongword;
736 } else {
737 return (uint64_t)fuiword(addr);
738 }
739 }
740
741 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)742 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
743 {
744 return ENOTSUP;
745 }
746
747 /*
748 * pid_for_task
749 *
750 * Find the BSD process ID for the Mach task associated with the given Mach port
751 * name
752 *
753 * Parameters: args User argument descriptor (see below)
754 *
755 * Indirect parameters: args->t Mach port name
756 * args->pid Process ID (returned value; see below)
757 *
758 * Returns: KERL_SUCCESS Success
759 * KERN_FAILURE Not success
760 *
761 * Implicit returns: args->pid Process ID
762 *
763 */
764 kern_return_t
pid_for_task(struct pid_for_task_args * args)765 pid_for_task(
766 struct pid_for_task_args *args)
767 {
768 mach_port_name_t t = args->t;
769 user_addr_t pid_addr = args->pid;
770 proc_t p;
771 task_t t1;
772 int pid = -1;
773 kern_return_t err = KERN_SUCCESS;
774
775 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
776 AUDIT_ARG(mach_port1, t);
777
778 t1 = port_name_to_task_name(t);
779
780 if (t1 == TASK_NULL) {
781 err = KERN_FAILURE;
782 goto pftout;
783 } else {
784 p = get_bsdtask_info(t1);
785 if (p) {
786 pid = proc_pid(p);
787 err = KERN_SUCCESS;
788 } else if (task_is_a_corpse(t1)) {
789 pid = task_pid(t1);
790 err = KERN_SUCCESS;
791 } else {
792 err = KERN_FAILURE;
793 }
794 }
795 task_deallocate(t1);
796 pftout:
797 AUDIT_ARG(pid, pid);
798 (void) copyout((char *) &pid, pid_addr, sizeof(int));
799 AUDIT_MACH_SYSCALL_EXIT(err);
800 return err;
801 }
802
803 /*
804 *
805 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
806 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
807 *
808 */
809 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
810
811 /*
812 * Routine: task_for_pid_posix_check
813 * Purpose:
814 * Verify that the current process should be allowed to
815 * get the target process's task port. This is only
816 * permitted if:
817 * - The current process is root
818 * OR all of the following are true:
819 * - The target process's real, effective, and saved uids
820 * are the same as the current proc's euid,
821 * - The target process's group set is a subset of the
822 * calling process's group set, and
823 * - The target process hasn't switched credentials.
824 *
825 * Returns: TRUE: permitted
826 * FALSE: denied
827 */
828 static int
task_for_pid_posix_check(proc_t target)829 task_for_pid_posix_check(proc_t target)
830 {
831 kauth_cred_t targetcred, mycred;
832 bool checkcredentials;
833 uid_t myuid;
834 int allowed;
835
836 /* No task_for_pid on bad targets */
837 if (target->p_stat == SZOMB) {
838 return FALSE;
839 }
840
841 mycred = kauth_cred_get();
842 myuid = kauth_cred_getuid(mycred);
843
844 /* If we're running as root, the check passes */
845 if (kauth_cred_issuser(mycred)) {
846 return TRUE;
847 }
848
849 /* We're allowed to get our own task port */
850 if (target == current_proc()) {
851 return TRUE;
852 }
853
854 /*
855 * Under DENY, only root can get another proc's task port,
856 * so no more checks are needed.
857 */
858 if (tfp_policy == KERN_TFP_POLICY_DENY) {
859 return FALSE;
860 }
861
862 targetcred = kauth_cred_proc_ref(target);
863 allowed = TRUE;
864
865 checkcredentials = !proc_is_third_party_debuggable_driver(target);
866
867 if (checkcredentials) {
868 /* Do target's ruid, euid, and saved uid match my euid? */
869 if ((kauth_cred_getuid(targetcred) != myuid) ||
870 (kauth_cred_getruid(targetcred) != myuid) ||
871 (kauth_cred_getsvuid(targetcred) != myuid)) {
872 allowed = FALSE;
873 goto out;
874 }
875 /* Are target's groups a subset of my groups? */
876 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
877 allowed == 0) {
878 allowed = FALSE;
879 goto out;
880 }
881 }
882
883 /* Has target switched credentials? */
884 if (target->p_flag & P_SUGID) {
885 allowed = FALSE;
886 goto out;
887 }
888
889 out:
890 kauth_cred_unref(&targetcred);
891 return allowed;
892 }
893
894 /*
895 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
896 *
897 * Description: Waits for the user space daemon to respond to the request
898 * we made. Function declared non inline to be visible in
899 * stackshots and spindumps as well as debugging.
900 */
901 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)902 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
903 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
904 {
905 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
906 }
907
908 /*
909 * Routine: task_for_pid
910 * Purpose:
911 * Get the task port for another "process", named by its
912 * process ID on the same host as "target_task".
913 *
914 * Only permitted to privileged processes, or processes
915 * with the same user ID.
916 *
917 * Note: if pid == 0, an error is return no matter who is calling.
918 *
919 * XXX This should be a BSD system call, not a Mach trap!!!
920 */
921 kern_return_t
task_for_pid(struct task_for_pid_args * args)922 task_for_pid(
923 struct task_for_pid_args *args)
924 {
925 mach_port_name_t target_tport = args->target_tport;
926 int pid = args->pid;
927 user_addr_t task_addr = args->t;
928 proc_t p = PROC_NULL;
929 task_t t1 = TASK_NULL;
930 task_t task = TASK_NULL;
931 mach_port_name_t tret = MACH_PORT_NULL;
932 ipc_port_t tfpport = MACH_PORT_NULL;
933 void * sright = NULL;
934 int error = 0;
935 boolean_t is_current_proc = FALSE;
936 struct proc_ident pident = {0};
937
938 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
939 AUDIT_ARG(pid, pid);
940 AUDIT_ARG(mach_port1, target_tport);
941
942 /* Always check if pid == 0 */
943 if (pid == 0) {
944 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
945 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
946 return KERN_FAILURE;
947 }
948
949 t1 = port_name_to_task(target_tport);
950 if (t1 == TASK_NULL) {
951 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
952 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
953 return KERN_FAILURE;
954 }
955
956
957 p = proc_find(pid);
958 if (p == PROC_NULL) {
959 error = KERN_FAILURE;
960 goto tfpout;
961 }
962 pident = proc_ident(p);
963 is_current_proc = (p == current_proc());
964
965 #if CONFIG_AUDIT
966 AUDIT_ARG(process, p);
967 #endif
968
969 if (!(task_for_pid_posix_check(p))) {
970 error = KERN_FAILURE;
971 goto tfpout;
972 }
973
974 if (proc_task(p) == TASK_NULL) {
975 error = KERN_SUCCESS;
976 goto tfpout;
977 }
978
979 /*
980 * Grab a task reference and drop the proc reference as the proc ref
981 * shouldn't be held accross upcalls.
982 */
983 task = proc_task(p);
984 task_reference(task);
985
986 proc_rele(p);
987 p = PROC_NULL;
988
989 #if CONFIG_MACF
990 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
991 if (error) {
992 error = KERN_FAILURE;
993 goto tfpout;
994 }
995 #endif
996
997 /* If we aren't root and target's task access port is set... */
998 if (!kauth_cred_issuser(kauth_cred_get()) &&
999 !is_current_proc &&
1000 (task_get_task_access_port(task, &tfpport) == 0) &&
1001 (tfpport != IPC_PORT_NULL)) {
1002 if (tfpport == IPC_PORT_DEAD) {
1003 error = KERN_PROTECTION_FAILURE;
1004 goto tfpout;
1005 }
1006
1007 /* Call up to the task access server */
1008 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1009 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1010
1011 if (error != MACH_MSG_SUCCESS) {
1012 if (error == MACH_RCV_INTERRUPTED) {
1013 error = KERN_ABORTED;
1014 } else {
1015 error = KERN_FAILURE;
1016 }
1017 goto tfpout;
1018 }
1019 }
1020
1021 /* Grant task port access */
1022 extmod_statistics_incr_task_for_pid(task);
1023
1024 /* this reference will be consumed during conversion */
1025 task_reference(task);
1026 if (task == current_task()) {
1027 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
1028 sright = (void *)convert_task_to_port_pinned(task);
1029 } else {
1030 sright = (void *)convert_task_to_port(task);
1031 }
1032 /* extra task ref consumed */
1033
1034 /*
1035 * Check if the task has been corpsified. We must do so after conversion
1036 * since we don't hold locks and may have grabbed a corpse control port
1037 * above which will prevent no-senders notification delivery.
1038 */
1039 if (task_is_a_corpse(task)) {
1040 ipc_port_release_send(sright);
1041 error = KERN_FAILURE;
1042 goto tfpout;
1043 }
1044
1045 tret = ipc_port_copyout_send(
1046 sright,
1047 get_task_ipcspace(current_task()));
1048
1049 error = KERN_SUCCESS;
1050
1051 tfpout:
1052 task_deallocate(t1);
1053 AUDIT_ARG(mach_port2, tret);
1054 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1055
1056 if (tfpport != IPC_PORT_NULL) {
1057 ipc_port_release_send(tfpport);
1058 }
1059 if (task != TASK_NULL) {
1060 task_deallocate(task);
1061 }
1062 if (p != PROC_NULL) {
1063 proc_rele(p);
1064 }
1065 AUDIT_MACH_SYSCALL_EXIT(error);
1066 return error;
1067 }
1068
1069 /*
1070 * Routine: task_name_for_pid
1071 * Purpose:
1072 * Get the task name port for another "process", named by its
1073 * process ID on the same host as "target_task".
1074 *
1075 * Only permitted to privileged processes, or processes
1076 * with the same user ID.
1077 *
1078 * XXX This should be a BSD system call, not a Mach trap!!!
1079 */
1080
1081 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1082 task_name_for_pid(
1083 struct task_name_for_pid_args *args)
1084 {
1085 mach_port_name_t target_tport = args->target_tport;
1086 int pid = args->pid;
1087 user_addr_t task_addr = args->t;
1088 proc_t p = PROC_NULL;
1089 task_t t1 = TASK_NULL;
1090 mach_port_name_t tret = MACH_PORT_NULL;
1091 void * sright;
1092 int error = 0, refheld = 0;
1093 kauth_cred_t target_cred;
1094
1095 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1096 AUDIT_ARG(pid, pid);
1097 AUDIT_ARG(mach_port1, target_tport);
1098
1099 t1 = port_name_to_task(target_tport);
1100 if (t1 == TASK_NULL) {
1101 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1102 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1103 return KERN_FAILURE;
1104 }
1105
1106 p = proc_find(pid);
1107 if (p != PROC_NULL) {
1108 AUDIT_ARG(process, p);
1109 target_cred = kauth_cred_proc_ref(p);
1110 refheld = 1;
1111
1112 if ((p->p_stat != SZOMB)
1113 && ((current_proc() == p)
1114 || kauth_cred_issuser(kauth_cred_get())
1115 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1116 ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1117 || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1118 )) {
1119 if (proc_task(p) != TASK_NULL) {
1120 struct proc_ident pident = proc_ident(p);
1121
1122 task_t task = proc_task(p);
1123
1124 task_reference(task);
1125 proc_rele(p);
1126 p = PROC_NULL;
1127 #if CONFIG_MACF
1128 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1129 if (error) {
1130 task_deallocate(task);
1131 goto noperm;
1132 }
1133 #endif
1134 sright = (void *)convert_task_name_to_port(task);
1135 task = NULL;
1136 tret = ipc_port_copyout_send(sright,
1137 get_task_ipcspace(current_task()));
1138 } else {
1139 tret = MACH_PORT_NULL;
1140 }
1141
1142 AUDIT_ARG(mach_port2, tret);
1143 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1144 task_deallocate(t1);
1145 error = KERN_SUCCESS;
1146 goto tnfpout;
1147 }
1148 }
1149
1150 #if CONFIG_MACF
1151 noperm:
1152 #endif
1153 task_deallocate(t1);
1154 tret = MACH_PORT_NULL;
1155 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1156 error = KERN_FAILURE;
1157 tnfpout:
1158 if (refheld != 0) {
1159 kauth_cred_unref(&target_cred);
1160 }
1161 if (p != PROC_NULL) {
1162 proc_rele(p);
1163 }
1164 AUDIT_MACH_SYSCALL_EXIT(error);
1165 return error;
1166 }
1167
1168 /*
1169 * Routine: task_inspect_for_pid
1170 * Purpose:
1171 * Get the task inspect port for another "process", named by its
1172 * process ID on the same host as "target_task".
1173 */
1174 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1175 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1176 {
1177 mach_port_name_t target_tport = args->target_tport;
1178 int pid = args->pid;
1179 user_addr_t task_addr = args->t;
1180
1181 proc_t proc = PROC_NULL;
1182 task_t t1 = TASK_NULL;
1183 task_inspect_t task_insp = TASK_INSPECT_NULL;
1184 mach_port_name_t tret = MACH_PORT_NULL;
1185 ipc_port_t tfpport = MACH_PORT_NULL;
1186 int error = 0;
1187 void *sright = NULL;
1188 boolean_t is_current_proc = FALSE;
1189 struct proc_ident pident = {0};
1190
1191 /* Disallow inspect port for kernel_task */
1192 if (pid == 0) {
1193 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1194 return EPERM;
1195 }
1196
1197 t1 = port_name_to_task(target_tport);
1198 if (t1 == TASK_NULL) {
1199 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1200 return EINVAL;
1201 }
1202
1203 proc = proc_find(pid);
1204 if (proc == PROC_NULL) {
1205 error = ESRCH;
1206 goto tifpout;
1207 }
1208 pident = proc_ident(proc);
1209 is_current_proc = (proc == current_proc());
1210
1211 if (!(task_for_pid_posix_check(proc))) {
1212 error = EPERM;
1213 goto tifpout;
1214 }
1215
1216 task_insp = proc_task(proc);
1217 if (task_insp == TASK_INSPECT_NULL) {
1218 goto tifpout;
1219 }
1220
1221 /*
1222 * Grab a task reference and drop the proc reference before making any upcalls.
1223 */
1224 task_reference(task_insp);
1225
1226 proc_rele(proc);
1227 proc = PROC_NULL;
1228
1229 #if CONFIG_MACF
1230 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1231 if (error) {
1232 error = EPERM;
1233 goto tifpout;
1234 }
1235 #endif
1236
1237 /* If we aren't root and target's task access port is set... */
1238 if (!kauth_cred_issuser(kauth_cred_get()) &&
1239 !is_current_proc &&
1240 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1241 (tfpport != IPC_PORT_NULL)) {
1242 if (tfpport == IPC_PORT_DEAD) {
1243 error = EACCES;
1244 goto tifpout;
1245 }
1246
1247
1248 /* Call up to the task access server */
1249 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1250 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1251
1252 if (error != MACH_MSG_SUCCESS) {
1253 if (error == MACH_RCV_INTERRUPTED) {
1254 error = EINTR;
1255 } else {
1256 error = EPERM;
1257 }
1258 goto tifpout;
1259 }
1260 }
1261
1262 /* Check if the task has been corpsified */
1263 if (task_is_a_corpse(task_insp)) {
1264 error = EACCES;
1265 goto tifpout;
1266 }
1267
1268 /* could be IP_NULL, consumes a ref */
1269 sright = (void*) convert_task_inspect_to_port(task_insp);
1270 task_insp = TASK_INSPECT_NULL;
1271 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1272
1273 tifpout:
1274 task_deallocate(t1);
1275 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1276 if (proc != PROC_NULL) {
1277 proc_rele(proc);
1278 }
1279 if (tfpport != IPC_PORT_NULL) {
1280 ipc_port_release_send(tfpport);
1281 }
1282 if (task_insp != TASK_INSPECT_NULL) {
1283 task_deallocate(task_insp);
1284 }
1285
1286 *ret = error;
1287 return error;
1288 }
1289
1290 /*
1291 * Routine: task_read_for_pid
1292 * Purpose:
1293 * Get the task read port for another "process", named by its
1294 * process ID on the same host as "target_task".
1295 */
1296 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1297 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1298 {
1299 mach_port_name_t target_tport = args->target_tport;
1300 int pid = args->pid;
1301 user_addr_t task_addr = args->t;
1302
1303 proc_t proc = PROC_NULL;
1304 task_t t1 = TASK_NULL;
1305 task_read_t task_read = TASK_READ_NULL;
1306 mach_port_name_t tret = MACH_PORT_NULL;
1307 ipc_port_t tfpport = MACH_PORT_NULL;
1308 int error = 0;
1309 void *sright = NULL;
1310 boolean_t is_current_proc = FALSE;
1311 struct proc_ident pident = {0};
1312
1313 /* Disallow read port for kernel_task */
1314 if (pid == 0) {
1315 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1316 return EPERM;
1317 }
1318
1319 t1 = port_name_to_task(target_tport);
1320 if (t1 == TASK_NULL) {
1321 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1322 return EINVAL;
1323 }
1324
1325 proc = proc_find(pid);
1326 if (proc == PROC_NULL) {
1327 error = ESRCH;
1328 goto trfpout;
1329 }
1330 pident = proc_ident(proc);
1331 is_current_proc = (proc == current_proc());
1332
1333 if (!(task_for_pid_posix_check(proc))) {
1334 error = EPERM;
1335 goto trfpout;
1336 }
1337
1338 task_read = proc_task(proc);
1339 if (task_read == TASK_INSPECT_NULL) {
1340 goto trfpout;
1341 }
1342
1343 /*
1344 * Grab a task reference and drop the proc reference before making any upcalls.
1345 */
1346 task_reference(task_read);
1347
1348 proc_rele(proc);
1349 proc = PROC_NULL;
1350
1351 #if CONFIG_MACF
1352 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1353 if (error) {
1354 error = EPERM;
1355 goto trfpout;
1356 }
1357 #endif
1358
1359 /* If we aren't root and target's task access port is set... */
1360 if (!kauth_cred_issuser(kauth_cred_get()) &&
1361 !is_current_proc &&
1362 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1363 (tfpport != IPC_PORT_NULL)) {
1364 if (tfpport == IPC_PORT_DEAD) {
1365 error = EACCES;
1366 goto trfpout;
1367 }
1368
1369
1370 /* Call up to the task access server */
1371 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1372 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1373
1374 if (error != MACH_MSG_SUCCESS) {
1375 if (error == MACH_RCV_INTERRUPTED) {
1376 error = EINTR;
1377 } else {
1378 error = EPERM;
1379 }
1380 goto trfpout;
1381 }
1382 }
1383
1384 /* Check if the task has been corpsified */
1385 if (task_is_a_corpse(task_read)) {
1386 error = EACCES;
1387 goto trfpout;
1388 }
1389
1390 /* could be IP_NULL, consumes a ref */
1391 sright = (void*) convert_task_read_to_port(task_read);
1392 task_read = TASK_READ_NULL;
1393 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1394
1395 trfpout:
1396 task_deallocate(t1);
1397 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1398 if (proc != PROC_NULL) {
1399 proc_rele(proc);
1400 }
1401 if (tfpport != IPC_PORT_NULL) {
1402 ipc_port_release_send(tfpport);
1403 }
1404 if (task_read != TASK_READ_NULL) {
1405 task_deallocate(task_read);
1406 }
1407
1408 *ret = error;
1409 return error;
1410 }
1411
1412 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1413 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1414 {
1415 task_t target = NULL;
1416 proc_t targetproc = PROC_NULL;
1417 int pid = args->pid;
1418 int error = 0;
1419 mach_port_t tfpport = MACH_PORT_NULL;
1420
1421 if (pid == 0) {
1422 error = EPERM;
1423 goto out;
1424 }
1425
1426 targetproc = proc_find(pid);
1427 if (targetproc == PROC_NULL) {
1428 error = ESRCH;
1429 goto out;
1430 }
1431
1432 if (!task_for_pid_posix_check(targetproc) &&
1433 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1434 error = EPERM;
1435 goto out;
1436 }
1437
1438 #if CONFIG_MACF
1439 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1440 if (error) {
1441 error = EPERM;
1442 goto out;
1443 }
1444 #endif
1445
1446 target = proc_task(targetproc);
1447 #if XNU_TARGET_OS_OSX
1448 if (target != TASK_NULL) {
1449 /* If we aren't root and target's task access port is set... */
1450 if (!kauth_cred_issuser(kauth_cred_get()) &&
1451 targetproc != current_proc() &&
1452 (task_get_task_access_port(target, &tfpport) == 0) &&
1453 (tfpport != IPC_PORT_NULL)) {
1454 if (tfpport == IPC_PORT_DEAD) {
1455 error = EACCES;
1456 goto out;
1457 }
1458
1459 /* Call up to the task access server */
1460 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1461 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1462
1463 if (error != MACH_MSG_SUCCESS) {
1464 if (error == MACH_RCV_INTERRUPTED) {
1465 error = EINTR;
1466 } else {
1467 error = EPERM;
1468 }
1469 goto out;
1470 }
1471 }
1472 }
1473 #endif /* XNU_TARGET_OS_OSX */
1474
1475 task_reference(target);
1476 error = task_pidsuspend(target);
1477 if (error) {
1478 if (error == KERN_INVALID_ARGUMENT) {
1479 error = EINVAL;
1480 } else {
1481 error = EPERM;
1482 }
1483 }
1484 #if CONFIG_MEMORYSTATUS
1485 else {
1486 memorystatus_on_suspend(targetproc);
1487 }
1488 #endif
1489
1490 task_deallocate(target);
1491
1492 out:
1493 if (tfpport != IPC_PORT_NULL) {
1494 ipc_port_release_send(tfpport);
1495 }
1496
1497 if (targetproc != PROC_NULL) {
1498 proc_rele(targetproc);
1499 }
1500 *ret = error;
1501 return error;
1502 }
1503
1504 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1505 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1506 {
1507 mach_port_name_t target_tport = args->target_tport;
1508 int pid = args->pid;
1509 user_addr_t task_addr = args->t;
1510 proc_t p = PROC_NULL;
1511 task_t t1 = TASK_NULL;
1512 task_t task = TASK_NULL;
1513 mach_port_name_t tret = MACH_PORT_NULL;
1514 ipc_port_t tfpport = MACH_PORT_NULL;
1515 ipc_port_t sright = NULL;
1516 int error = 0;
1517 boolean_t is_current_proc = FALSE;
1518 struct proc_ident pident = {0};
1519
1520 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1521 AUDIT_ARG(pid, pid);
1522 AUDIT_ARG(mach_port1, target_tport);
1523
1524 /* Always check if pid == 0 */
1525 if (pid == 0) {
1526 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1527 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1528 return KERN_FAILURE;
1529 }
1530
1531 t1 = port_name_to_task(target_tport);
1532 if (t1 == TASK_NULL) {
1533 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1534 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1535 return KERN_FAILURE;
1536 }
1537
1538 p = proc_find(pid);
1539 if (p == PROC_NULL) {
1540 error = KERN_FAILURE;
1541 goto tfpout;
1542 }
1543 pident = proc_ident(p);
1544 is_current_proc = (p == current_proc());
1545
1546 #if CONFIG_AUDIT
1547 AUDIT_ARG(process, p);
1548 #endif
1549
1550 if (!(task_for_pid_posix_check(p))) {
1551 error = KERN_FAILURE;
1552 goto tfpout;
1553 }
1554
1555 if (proc_task(p) == TASK_NULL) {
1556 error = KERN_SUCCESS;
1557 goto tfpout;
1558 }
1559
1560 /*
1561 * Grab a task reference and drop the proc reference before making any upcalls.
1562 */
1563 task = proc_task(p);
1564 task_reference(task);
1565
1566 proc_rele(p);
1567 p = PROC_NULL;
1568
1569 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1570 #if CONFIG_MACF
1571 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1572 if (error) {
1573 error = KERN_FAILURE;
1574 goto tfpout;
1575 }
1576 #endif
1577
1578 /* If we aren't root and target's task access port is set... */
1579 if (!kauth_cred_issuser(kauth_cred_get()) &&
1580 !is_current_proc &&
1581 (task_get_task_access_port(task, &tfpport) == 0) &&
1582 (tfpport != IPC_PORT_NULL)) {
1583 if (tfpport == IPC_PORT_DEAD) {
1584 error = KERN_PROTECTION_FAILURE;
1585 goto tfpout;
1586 }
1587
1588
1589 /* Call up to the task access server */
1590 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1591 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1592
1593 if (error != MACH_MSG_SUCCESS) {
1594 if (error == MACH_RCV_INTERRUPTED) {
1595 error = KERN_ABORTED;
1596 } else {
1597 error = KERN_FAILURE;
1598 }
1599 goto tfpout;
1600 }
1601 }
1602 }
1603
1604 /* Check if the task has been corpsified */
1605 if (task_is_a_corpse(task)) {
1606 error = KERN_FAILURE;
1607 goto tfpout;
1608 }
1609
1610 error = task_get_debug_control_port(task, &sright);
1611 if (error != KERN_SUCCESS) {
1612 goto tfpout;
1613 }
1614
1615 tret = ipc_port_copyout_send(
1616 sright,
1617 get_task_ipcspace(current_task()));
1618
1619 error = KERN_SUCCESS;
1620
1621 tfpout:
1622 task_deallocate(t1);
1623 AUDIT_ARG(mach_port2, tret);
1624 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1625
1626 if (tfpport != IPC_PORT_NULL) {
1627 ipc_port_release_send(tfpport);
1628 }
1629 if (task != TASK_NULL) {
1630 task_deallocate(task);
1631 }
1632 if (p != PROC_NULL) {
1633 proc_rele(p);
1634 }
1635 AUDIT_MACH_SYSCALL_EXIT(error);
1636 return error;
1637 }
1638
1639 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1640 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1641 {
1642 task_t target = NULL;
1643 proc_t targetproc = PROC_NULL;
1644 int pid = args->pid;
1645 int error = 0;
1646 mach_port_t tfpport = MACH_PORT_NULL;
1647
1648 if (pid == 0) {
1649 error = EPERM;
1650 goto out;
1651 }
1652
1653 targetproc = proc_find(pid);
1654 if (targetproc == PROC_NULL) {
1655 error = ESRCH;
1656 goto out;
1657 }
1658
1659 if (!task_for_pid_posix_check(targetproc) &&
1660 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1661 error = EPERM;
1662 goto out;
1663 }
1664
1665 #if CONFIG_MACF
1666 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1667 if (error) {
1668 error = EPERM;
1669 goto out;
1670 }
1671 #endif
1672
1673 target = proc_task(targetproc);
1674 #if XNU_TARGET_OS_OSX
1675 if (target != TASK_NULL) {
1676 /* If we aren't root and target's task access port is set... */
1677 if (!kauth_cred_issuser(kauth_cred_get()) &&
1678 targetproc != current_proc() &&
1679 (task_get_task_access_port(target, &tfpport) == 0) &&
1680 (tfpport != IPC_PORT_NULL)) {
1681 if (tfpport == IPC_PORT_DEAD) {
1682 error = EACCES;
1683 goto out;
1684 }
1685
1686 /* Call up to the task access server */
1687 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1688 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1689
1690 if (error != MACH_MSG_SUCCESS) {
1691 if (error == MACH_RCV_INTERRUPTED) {
1692 error = EINTR;
1693 } else {
1694 error = EPERM;
1695 }
1696 goto out;
1697 }
1698 }
1699 }
1700 #endif /* XNU_TARGET_OS_OSX */
1701
1702 #if !XNU_TARGET_OS_OSX
1703 #if SOCKETS
1704 resume_proc_sockets(targetproc);
1705 #endif /* SOCKETS */
1706 #endif /* !XNU_TARGET_OS_OSX */
1707
1708 task_reference(target);
1709
1710 #if CONFIG_MEMORYSTATUS
1711 memorystatus_on_resume(targetproc);
1712 #endif
1713
1714 error = task_pidresume(target);
1715 if (error) {
1716 if (error == KERN_INVALID_ARGUMENT) {
1717 error = EINVAL;
1718 } else {
1719 if (error == KERN_MEMORY_ERROR) {
1720 psignal(targetproc, SIGKILL);
1721 error = EIO;
1722 } else {
1723 error = EPERM;
1724 }
1725 }
1726 }
1727
1728 task_deallocate(target);
1729
1730 out:
1731 if (tfpport != IPC_PORT_NULL) {
1732 ipc_port_release_send(tfpport);
1733 }
1734
1735 if (targetproc != PROC_NULL) {
1736 proc_rele(targetproc);
1737 }
1738
1739 *ret = error;
1740 return error;
1741 }
1742
1743 #if !XNU_TARGET_OS_OSX
1744 /*
1745 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1746 * When a process is specified, this call is blocking, otherwise we wake up the
1747 * freezer thread and do not block on a process being frozen.
1748 */
1749 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1750 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1751 {
1752 int error = 0;
1753 proc_t targetproc = PROC_NULL;
1754 int pid = args->pid;
1755
1756 #ifndef CONFIG_FREEZE
1757 #pragma unused(pid)
1758 #else
1759
1760 /*
1761 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1762 */
1763
1764 if (pid >= 0) {
1765 targetproc = proc_find(pid);
1766
1767 if (targetproc == PROC_NULL) {
1768 error = ESRCH;
1769 goto out;
1770 }
1771
1772 if (!task_for_pid_posix_check(targetproc)) {
1773 error = EPERM;
1774 goto out;
1775 }
1776 }
1777
1778 #if CONFIG_MACF
1779 //Note that targetproc may be null
1780 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1781 if (error) {
1782 error = EPERM;
1783 goto out;
1784 }
1785 #endif
1786
1787 if (pid == -2) {
1788 vm_pageout_anonymous_pages();
1789 } else if (pid == -1) {
1790 memorystatus_on_inactivity(targetproc);
1791 } else {
1792 error = memorystatus_freeze_process_sync(targetproc);
1793 }
1794
1795 out:
1796
1797 #endif /* CONFIG_FREEZE */
1798
1799 if (targetproc != PROC_NULL) {
1800 proc_rele(targetproc);
1801 }
1802 *ret = error;
1803 return error;
1804 }
1805 #endif /* !XNU_TARGET_OS_OSX */
1806
1807 #if SOCKETS
1808 int
networking_memstatus_callout(proc_t p,uint32_t status)1809 networking_memstatus_callout(proc_t p, uint32_t status)
1810 {
1811 struct fileproc *fp;
1812
1813 /*
1814 * proc list lock NOT held
1815 * proc lock NOT held
1816 * a reference on the proc has been held / shall be dropped by the caller.
1817 */
1818 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1819 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1820
1821 proc_fdlock(p);
1822
1823 fdt_foreach(fp, p) {
1824 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1825 #if NECP
1826 case DTYPE_NETPOLICY:
1827 necp_fd_memstatus(p, status,
1828 (struct necp_fd_data *)fp_get_data(fp));
1829 break;
1830 #endif /* NECP */
1831 #if SKYWALK
1832 case DTYPE_CHANNEL:
1833 kern_channel_memstatus(p, status,
1834 (struct kern_channel *)fp_get_data(fp));
1835 break;
1836 #endif /* SKYWALK */
1837 default:
1838 break;
1839 }
1840 }
1841 proc_fdunlock(p);
1842
1843 return 1;
1844 }
1845
1846 #if SKYWALK
1847 /*
1848 * Since we make multiple passes across the fileproc array, record the
1849 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1850 * large enough to accomodate most, if not all cases. If we find more,
1851 * we'll go to the slow path during second pass.
1852 */
1853 #define MAX_CHANNELS 8 /* should be more than enough */
1854 #endif /* SKYWALK */
1855
1856 static int
networking_defunct_callout(proc_t p,void * arg)1857 networking_defunct_callout(proc_t p, void *arg)
1858 {
1859 struct pid_shutdown_sockets_args *args = arg;
1860 int pid = args->pid;
1861 int level = args->level;
1862 struct fileproc *fp;
1863 #if SKYWALK
1864 int i;
1865 int channel_count = 0;
1866 struct kern_channel *channel_array[MAX_CHANNELS];
1867
1868 bzero(&channel_array, sizeof(channel_array));
1869 #endif /* SKYWALK */
1870
1871 proc_fdlock(p);
1872
1873 fdt_foreach(fp, p) {
1874 struct fileglob *fg = fp->fp_glob;
1875
1876 switch (FILEGLOB_DTYPE(fg)) {
1877 case DTYPE_SOCKET: {
1878 struct socket *so = (struct socket *)fg_get_data(fg);
1879 if (proc_getpid(p) == pid || so->last_pid == pid ||
1880 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1881 /* Call networking stack with socket and level */
1882 (void)socket_defunct(p, so, level);
1883 }
1884 break;
1885 }
1886 #if NECP
1887 case DTYPE_NETPOLICY:
1888 /* first pass: defunct necp and get stats for ntstat */
1889 if (proc_getpid(p) == pid) {
1890 necp_fd_defunct(p,
1891 (struct necp_fd_data *)fg_get_data(fg));
1892 }
1893 break;
1894 #endif /* NECP */
1895 #if SKYWALK
1896 case DTYPE_CHANNEL:
1897 /* first pass: get channels and total count */
1898 if (proc_getpid(p) == pid) {
1899 if (channel_count < MAX_CHANNELS) {
1900 channel_array[channel_count] =
1901 (struct kern_channel *)fg_get_data(fg);
1902 }
1903 ++channel_count;
1904 }
1905 break;
1906 #endif /* SKYWALK */
1907 default:
1908 break;
1909 }
1910 }
1911
1912 #if SKYWALK
1913 /*
1914 * Second pass: defunct channels/flows (after NECP). Handle
1915 * the common case of up to MAX_CHANNELS count with fast path,
1916 * and traverse the fileproc array again only if we exceed it.
1917 */
1918 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1919 ASSERT(proc_getpid(p) == pid);
1920 for (i = 0; i < channel_count; i++) {
1921 ASSERT(channel_array[i] != NULL);
1922 kern_channel_defunct(p, channel_array[i]);
1923 }
1924 } else if (channel_count != 0) {
1925 ASSERT(proc_getpid(p) == pid);
1926 fdt_foreach(fp, p) {
1927 struct fileglob *fg = fp->fp_glob;
1928
1929 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1930 kern_channel_defunct(p,
1931 (struct kern_channel *)fg_get_data(fg));
1932 }
1933 }
1934 }
1935 #endif /* SKYWALK */
1936 proc_fdunlock(p);
1937
1938 return PROC_RETURNED;
1939 }
1940
1941 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1942 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1943 {
1944 int error = 0;
1945 proc_t targetproc = PROC_NULL;
1946 int pid = args->pid;
1947 int level = args->level;
1948
1949 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1950 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1951 error = EINVAL;
1952 goto out;
1953 }
1954
1955 targetproc = proc_find(pid);
1956 if (targetproc == PROC_NULL) {
1957 error = ESRCH;
1958 goto out;
1959 }
1960
1961 if (!task_for_pid_posix_check(targetproc) &&
1962 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1963 error = EPERM;
1964 goto out;
1965 }
1966
1967 #if CONFIG_MACF
1968 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1969 if (error) {
1970 error = EPERM;
1971 goto out;
1972 }
1973 #endif
1974
1975 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1976 networking_defunct_callout, args, NULL, NULL);
1977
1978 out:
1979 if (targetproc != PROC_NULL) {
1980 proc_rele(targetproc);
1981 }
1982 *ret = error;
1983 return error;
1984 }
1985
1986 #endif /* SOCKETS */
1987
1988 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1989 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1990 __unused int arg2, struct sysctl_req *req)
1991 {
1992 int error = 0;
1993 int new_value;
1994
1995 error = SYSCTL_OUT(req, arg1, sizeof(int));
1996 if (error || req->newptr == USER_ADDR_NULL) {
1997 return error;
1998 }
1999
2000 if (!kauth_cred_issuser(kauth_cred_get())) {
2001 return EPERM;
2002 }
2003
2004 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2005 goto out;
2006 }
2007 if ((new_value == KERN_TFP_POLICY_DENY)
2008 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2009 tfp_policy = new_value;
2010 } else {
2011 error = EINVAL;
2012 }
2013 out:
2014 return error;
2015 }
2016
2017 #if defined(SECURE_KERNEL)
2018 static int kern_secure_kernel = 1;
2019 #else
2020 static int kern_secure_kernel = 0;
2021 #endif
2022
2023 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2024
2025 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2026 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2027 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2028
2029 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2030 &shared_region_trace_level, 0, "");
2031 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2032 &shared_region_version, 0, "");
2033 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2034 &shared_region_persistence, 0, "");
2035
2036 /*
2037 * shared_region_check_np:
2038 *
2039 * This system call is intended for dyld.
2040 *
2041 * dyld calls this when any process starts to see if the process's shared
2042 * region is already set up and ready to use.
2043 * This call returns the base address of the first mapping in the
2044 * process's shared region's first mapping.
2045 * dyld will then check what's mapped at that address.
2046 *
2047 * If the shared region is empty, dyld will then attempt to map the shared
2048 * cache file in the shared region via the shared_region_map_np() system call.
2049 *
2050 * If something's already mapped in the shared region, dyld will check if it
2051 * matches the shared cache it would like to use for that process.
2052 * If it matches, evrything's ready and the process can proceed and use the
2053 * shared region.
2054 * If it doesn't match, dyld will unmap the shared region and map the shared
2055 * cache into the process's address space via mmap().
2056 *
2057 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2058 * the shared region. We will remove the shared_region reference from the task.
2059 *
2060 * ERROR VALUES
2061 * EINVAL no shared region
2062 * ENOMEM shared region is empty
2063 * EFAULT bad address for "start_address"
2064 */
2065 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2066 shared_region_check_np(
2067 __unused struct proc *p,
2068 struct shared_region_check_np_args *uap,
2069 __unused int *retvalp)
2070 {
2071 vm_shared_region_t shared_region;
2072 mach_vm_offset_t start_address = 0;
2073 int error = 0;
2074 kern_return_t kr;
2075 task_t task = current_task();
2076
2077 SHARED_REGION_TRACE_DEBUG(
2078 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2079 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2080 proc_getpid(p), p->p_comm,
2081 (uint64_t)uap->start_address));
2082
2083 /*
2084 * Special value of start_address used to indicate that map_with_linking() should
2085 * no longer be allowed in this process
2086 */
2087 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2088 p->p_disallow_map_with_linking = TRUE;
2089 return 0;
2090 }
2091
2092 /* retrieve the current tasks's shared region */
2093 shared_region = vm_shared_region_get(task);
2094 if (shared_region != NULL) {
2095 /*
2096 * A NULL argument is used by dyld to indicate the task
2097 * has unmapped its shared region.
2098 */
2099 if (uap->start_address == 0) {
2100 /* unmap it first */
2101 vm_shared_region_remove(task, shared_region);
2102 vm_shared_region_set(task, NULL);
2103 } else {
2104 /* retrieve address of its first mapping... */
2105 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2106 if (kr != KERN_SUCCESS) {
2107 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2108 "check_np(0x%llx) "
2109 "vm_shared_region_start_address() failed\n",
2110 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2111 proc_getpid(p), p->p_comm,
2112 (uint64_t)uap->start_address));
2113 error = ENOMEM;
2114 } else {
2115 #if __has_feature(ptrauth_calls)
2116 /*
2117 * Remap any section of the shared library that
2118 * has authenticated pointers into private memory.
2119 */
2120 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2121 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2122 "check_np(0x%llx) "
2123 "vm_shared_region_auth_remap() failed\n",
2124 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2125 proc_getpid(p), p->p_comm,
2126 (uint64_t)uap->start_address));
2127 error = ENOMEM;
2128 }
2129 #endif /* __has_feature(ptrauth_calls) */
2130
2131 /* ... and give it to the caller */
2132 if (error == 0) {
2133 error = copyout(&start_address,
2134 (user_addr_t) uap->start_address,
2135 sizeof(start_address));
2136 if (error != 0) {
2137 SHARED_REGION_TRACE_ERROR(
2138 ("shared_region: %p [%d(%s)] "
2139 "check_np(0x%llx) "
2140 "copyout(0x%llx) error %d\n",
2141 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2142 proc_getpid(p), p->p_comm,
2143 (uint64_t)uap->start_address, (uint64_t)start_address,
2144 error));
2145 }
2146 }
2147 }
2148 }
2149 vm_shared_region_deallocate(shared_region);
2150 } else {
2151 /* no shared region ! */
2152 error = EINVAL;
2153 }
2154
2155 SHARED_REGION_TRACE_DEBUG(
2156 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2157 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2158 proc_getpid(p), p->p_comm,
2159 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2160
2161 return error;
2162 }
2163
2164
2165 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2166 shared_region_copyin(
2167 struct proc *p,
2168 user_addr_t user_addr,
2169 unsigned int count,
2170 unsigned int element_size,
2171 void *kernel_data)
2172 {
2173 int error = 0;
2174 vm_size_t size = count * element_size;
2175
2176 error = copyin(user_addr, kernel_data, size);
2177 if (error) {
2178 SHARED_REGION_TRACE_ERROR(
2179 ("shared_region: %p [%d(%s)] map(): "
2180 "copyin(0x%llx, %ld) failed (error=%d)\n",
2181 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2182 proc_getpid(p), p->p_comm,
2183 (uint64_t)user_addr, (long)size, error));
2184 }
2185 return error;
2186 }
2187
2188 /*
2189 * A reasonable upper limit to prevent overflow of allocation/copyin.
2190 */
2191 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2192
2193 /* forward declaration */
2194 __attribute__((noinline))
2195 static void shared_region_map_and_slide_cleanup(
2196 struct proc *p,
2197 uint32_t files_count,
2198 struct _sr_file_mappings *sr_file_mappings,
2199 struct vm_shared_region *shared_region);
2200
2201 /*
2202 * Setup part of _shared_region_map_and_slide().
2203 * It had to be broken out of _shared_region_map_and_slide() to
2204 * prevent compiler inlining from blowing out the stack.
2205 */
2206 __attribute__((noinline))
2207 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2208 shared_region_map_and_slide_setup(
2209 struct proc *p,
2210 uint32_t files_count,
2211 struct shared_file_np *files,
2212 uint32_t mappings_count,
2213 struct shared_file_mapping_slide_np *mappings,
2214 struct _sr_file_mappings **sr_file_mappings,
2215 struct vm_shared_region **shared_region_ptr,
2216 struct vnode *rdir_vp)
2217 {
2218 int error = 0;
2219 struct _sr_file_mappings *srfmp;
2220 uint32_t mappings_next;
2221 struct vnode_attr va;
2222 off_t fs;
2223 #if CONFIG_MACF
2224 vm_prot_t maxprot = VM_PROT_ALL;
2225 #endif
2226 uint32_t i;
2227 struct vm_shared_region *shared_region = NULL;
2228 boolean_t is_driverkit = task_is_driver(current_task());
2229
2230 SHARED_REGION_TRACE_DEBUG(
2231 ("shared_region: %p [%d(%s)] -> map\n",
2232 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2233 proc_getpid(p), p->p_comm));
2234
2235 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2236 error = E2BIG;
2237 goto done;
2238 }
2239 if (files_count == 0) {
2240 error = EINVAL;
2241 goto done;
2242 }
2243 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2244 Z_WAITOK | Z_ZERO);
2245 if (*sr_file_mappings == NULL) {
2246 error = ENOMEM;
2247 goto done;
2248 }
2249 mappings_next = 0;
2250 for (i = 0; i < files_count; i++) {
2251 srfmp = &(*sr_file_mappings)[i];
2252 srfmp->fd = files[i].sf_fd;
2253 srfmp->mappings_count = files[i].sf_mappings_count;
2254 srfmp->mappings = &mappings[mappings_next];
2255 mappings_next += srfmp->mappings_count;
2256 if (mappings_next > mappings_count) {
2257 error = EINVAL;
2258 goto done;
2259 }
2260 srfmp->slide = files[i].sf_slide;
2261 }
2262
2263 /* get the process's shared region (setup in vm_map_exec()) */
2264 shared_region = vm_shared_region_trim_and_get(current_task());
2265 *shared_region_ptr = shared_region;
2266 if (shared_region == NULL) {
2267 SHARED_REGION_TRACE_ERROR(
2268 ("shared_region: %p [%d(%s)] map(): "
2269 "no shared region\n",
2270 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2271 proc_getpid(p), p->p_comm));
2272 error = EINVAL;
2273 goto done;
2274 }
2275
2276 /*
2277 * Check the shared region matches the current root
2278 * directory of this process. Deny the mapping to
2279 * avoid tainting the shared region with something that
2280 * doesn't quite belong into it.
2281 */
2282 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2283 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2284 SHARED_REGION_TRACE_ERROR(
2285 ("shared_region: map(%p) root_dir mismatch\n",
2286 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2287 error = EPERM;
2288 goto done;
2289 }
2290
2291
2292 for (srfmp = &(*sr_file_mappings)[0];
2293 srfmp < &(*sr_file_mappings)[files_count];
2294 srfmp++) {
2295 if (srfmp->mappings_count == 0) {
2296 /* no mappings here... */
2297 continue;
2298 }
2299
2300 /*
2301 * A file descriptor of -1 is used to indicate that the data
2302 * to be put in the shared region for this mapping comes directly
2303 * from the processes address space. Ensure we have proper alignments.
2304 */
2305 if (srfmp->fd == -1) {
2306 /* only allow one mapping per fd */
2307 if (srfmp->mappings_count > 1) {
2308 SHARED_REGION_TRACE_ERROR(
2309 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
2310 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2311 proc_getpid(p), p->p_comm));
2312 error = EINVAL;
2313 goto done;
2314 }
2315
2316 /*
2317 * The destination address and size must be page aligned.
2318 */
2319 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2320 mach_vm_address_t dest_addr = mapping->sms_address;
2321 mach_vm_size_t map_size = mapping->sms_size;
2322 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2323 SHARED_REGION_TRACE_ERROR(
2324 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2325 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2326 proc_getpid(p), p->p_comm, dest_addr));
2327 error = EINVAL;
2328 goto done;
2329 }
2330 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2331 SHARED_REGION_TRACE_ERROR(
2332 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2333 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2334 proc_getpid(p), p->p_comm, map_size));
2335 error = EINVAL;
2336 goto done;
2337 }
2338 continue;
2339 }
2340
2341 /* get file structure from file descriptor */
2342 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2343 if (error) {
2344 SHARED_REGION_TRACE_ERROR(
2345 ("shared_region: %p [%d(%s)] map: "
2346 "fd=%d lookup failed (error=%d)\n",
2347 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2348 proc_getpid(p), p->p_comm, srfmp->fd, error));
2349 goto done;
2350 }
2351
2352 /* we need at least read permission on the file */
2353 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2354 SHARED_REGION_TRACE_ERROR(
2355 ("shared_region: %p [%d(%s)] map: "
2356 "fd=%d not readable\n",
2357 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2358 proc_getpid(p), p->p_comm, srfmp->fd));
2359 error = EPERM;
2360 goto done;
2361 }
2362
2363 /* get vnode from file structure */
2364 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2365 if (error) {
2366 SHARED_REGION_TRACE_ERROR(
2367 ("shared_region: %p [%d(%s)] map: "
2368 "fd=%d getwithref failed (error=%d)\n",
2369 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2370 proc_getpid(p), p->p_comm, srfmp->fd, error));
2371 goto done;
2372 }
2373 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2374
2375 /* make sure the vnode is a regular file */
2376 if (srfmp->vp->v_type != VREG) {
2377 SHARED_REGION_TRACE_ERROR(
2378 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2379 "not a file (type=%d)\n",
2380 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2381 proc_getpid(p), p->p_comm,
2382 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2383 srfmp->vp->v_name, srfmp->vp->v_type));
2384 error = EINVAL;
2385 goto done;
2386 }
2387
2388 #if CONFIG_MACF
2389 /* pass in 0 for the offset argument because AMFI does not need the offset
2390 * of the shared cache */
2391 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2392 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2393 if (error) {
2394 goto done;
2395 }
2396 #endif /* MAC */
2397
2398 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2399 /*
2400 * Check if the shared cache is in the trust cache;
2401 * if so, we can skip the root ownership check.
2402 */
2403 #if DEVELOPMENT || DEBUG
2404 /*
2405 * Skip both root ownership and trust cache check if
2406 * enforcement is disabled.
2407 */
2408 if (!cs_system_enforcement()) {
2409 goto after_root_check;
2410 }
2411 #endif /* DEVELOPMENT || DEBUG */
2412 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2413 if (blob == NULL) {
2414 SHARED_REGION_TRACE_ERROR(
2415 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2416 "missing CS blob\n",
2417 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2418 proc_getpid(p), p->p_comm,
2419 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2420 srfmp->vp->v_name));
2421 goto root_check;
2422 }
2423 const uint8_t *cdhash = csblob_get_cdhash(blob);
2424 if (cdhash == NULL) {
2425 SHARED_REGION_TRACE_ERROR(
2426 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2427 "missing cdhash\n",
2428 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2429 proc_getpid(p), p->p_comm,
2430 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2431 srfmp->vp->v_name));
2432 goto root_check;
2433 }
2434
2435 bool in_trust_cache = false;
2436 TrustCacheQueryToken_t qt;
2437 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2438 TCType_t tc_type = kTCTypeInvalid;
2439 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2440 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2441 (tc_type == kTCTypeCryptex1BootOS ||
2442 tc_type == kTCTypeStatic ||
2443 tc_type == kTCTypeEngineering));
2444 }
2445 if (!in_trust_cache) {
2446 SHARED_REGION_TRACE_ERROR(
2447 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2448 "not in trust cache\n",
2449 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2450 proc_getpid(p), p->p_comm,
2451 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2452 srfmp->vp->v_name));
2453 goto root_check;
2454 }
2455 goto after_root_check;
2456 root_check:
2457 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2458
2459 /* The shared cache file must be owned by root */
2460 VATTR_INIT(&va);
2461 VATTR_WANTED(&va, va_uid);
2462 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2463 if (error) {
2464 SHARED_REGION_TRACE_ERROR(
2465 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2466 "vnode_getattr(%p) failed (error=%d)\n",
2467 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2468 proc_getpid(p), p->p_comm,
2469 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2470 srfmp->vp->v_name,
2471 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2472 error));
2473 goto done;
2474 }
2475 if (va.va_uid != 0) {
2476 SHARED_REGION_TRACE_ERROR(
2477 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2478 "owned by uid=%d instead of 0\n",
2479 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2480 proc_getpid(p), p->p_comm,
2481 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2482 srfmp->vp->v_name, va.va_uid));
2483 error = EPERM;
2484 goto done;
2485 }
2486
2487 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2488 after_root_check:
2489 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2490
2491 #if CONFIG_CSR
2492 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2493 VATTR_INIT(&va);
2494 VATTR_WANTED(&va, va_flags);
2495 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2496 if (error) {
2497 SHARED_REGION_TRACE_ERROR(
2498 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2499 "vnode_getattr(%p) failed (error=%d)\n",
2500 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2501 proc_getpid(p), p->p_comm,
2502 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2503 srfmp->vp->v_name,
2504 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2505 error));
2506 goto done;
2507 }
2508
2509 if (!(va.va_flags & SF_RESTRICTED)) {
2510 /*
2511 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2512 * the shared cache file is NOT SIP-protected, so reject the
2513 * mapping request
2514 */
2515 SHARED_REGION_TRACE_ERROR(
2516 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2517 "vnode is not SIP-protected. \n",
2518 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2519 proc_getpid(p), p->p_comm,
2520 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2521 srfmp->vp->v_name));
2522 error = EPERM;
2523 goto done;
2524 }
2525 }
2526 #else /* CONFIG_CSR */
2527
2528 /*
2529 * Devices without SIP/ROSP need to make sure that the shared cache
2530 * is either on the root volume or in the preboot cryptex volume.
2531 */
2532 assert(rdir_vp != NULL);
2533 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2534 vnode_t preboot_vp = NULL;
2535 #if XNU_TARGET_OS_OSX
2536 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2537 #else
2538 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2539 #endif
2540 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2541 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2542 SHARED_REGION_TRACE_ERROR(
2543 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2544 "not on process' root volume nor preboot volume\n",
2545 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2546 proc_getpid(p), p->p_comm,
2547 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2548 srfmp->vp->v_name));
2549 error = EPERM;
2550 if (preboot_vp) {
2551 (void)vnode_put(preboot_vp);
2552 }
2553 goto done;
2554 } else if (preboot_vp) {
2555 (void)vnode_put(preboot_vp);
2556 }
2557 }
2558 #endif /* CONFIG_CSR */
2559
2560 if (scdir_enforce) {
2561 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2562 struct vnode *scdir_vp = NULL;
2563 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2564 *expected_scdir_path != NULL;
2565 expected_scdir_path++) {
2566 /* get vnode for expected_scdir_path */
2567 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2568 if (error) {
2569 SHARED_REGION_TRACE_ERROR(
2570 ("shared_region: %p [%d(%s)]: "
2571 "vnode_lookup(%s) failed (error=%d)\n",
2572 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2573 proc_getpid(p), p->p_comm,
2574 *expected_scdir_path, error));
2575 continue;
2576 }
2577
2578 /* check if parent is scdir_vp */
2579 assert(scdir_vp != NULL);
2580 if (vnode_parent(srfmp->vp) == scdir_vp) {
2581 (void)vnode_put(scdir_vp);
2582 scdir_vp = NULL;
2583 goto scdir_ok;
2584 }
2585 (void)vnode_put(scdir_vp);
2586 scdir_vp = NULL;
2587 }
2588 /* nothing matches */
2589 SHARED_REGION_TRACE_ERROR(
2590 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2591 "shared cache file not in expected directory\n",
2592 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2593 proc_getpid(p), p->p_comm,
2594 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2595 srfmp->vp->v_name));
2596 error = EPERM;
2597 goto done;
2598 }
2599 scdir_ok:
2600
2601 /* get vnode size */
2602 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2603 if (error) {
2604 SHARED_REGION_TRACE_ERROR(
2605 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2606 "vnode_size(%p) failed (error=%d)\n",
2607 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2608 proc_getpid(p), p->p_comm,
2609 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2610 srfmp->vp->v_name,
2611 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2612 goto done;
2613 }
2614 srfmp->file_size = fs;
2615
2616 /* get the file's memory object handle */
2617 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2618 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2619 SHARED_REGION_TRACE_ERROR(
2620 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2621 "no memory object\n",
2622 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2623 proc_getpid(p), p->p_comm,
2624 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2625 srfmp->vp->v_name));
2626 error = EINVAL;
2627 goto done;
2628 }
2629
2630 /* check that the mappings are properly covered by code signatures */
2631 if (!cs_system_enforcement()) {
2632 /* code signing is not enforced: no need to check */
2633 } else {
2634 for (i = 0; i < srfmp->mappings_count; i++) {
2635 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2636 /* zero-filled mapping: not backed by the file */
2637 continue;
2638 }
2639 if (ubc_cs_is_range_codesigned(srfmp->vp,
2640 srfmp->mappings[i].sms_file_offset,
2641 srfmp->mappings[i].sms_size)) {
2642 /* this mapping is fully covered by code signatures */
2643 continue;
2644 }
2645 SHARED_REGION_TRACE_ERROR(
2646 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2647 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2648 "is not code-signed\n",
2649 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2650 proc_getpid(p), p->p_comm,
2651 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2652 srfmp->vp->v_name,
2653 i, srfmp->mappings_count,
2654 srfmp->mappings[i].sms_address,
2655 srfmp->mappings[i].sms_size,
2656 srfmp->mappings[i].sms_file_offset,
2657 srfmp->mappings[i].sms_max_prot,
2658 srfmp->mappings[i].sms_init_prot));
2659 error = EINVAL;
2660 goto done;
2661 }
2662 }
2663 }
2664 done:
2665 if (error != 0) {
2666 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2667 *sr_file_mappings = NULL;
2668 *shared_region_ptr = NULL;
2669 }
2670 return error;
2671 }
2672
2673 /*
2674 * shared_region_map_np()
2675 *
2676 * This system call is intended for dyld.
2677 *
2678 * dyld uses this to map a shared cache file into a shared region.
2679 * This is usually done only the first time a shared cache is needed.
2680 * Subsequent processes will just use the populated shared region without
2681 * requiring any further setup.
2682 */
2683 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2684 _shared_region_map_and_slide(
2685 struct proc *p,
2686 uint32_t files_count,
2687 struct shared_file_np *files,
2688 uint32_t mappings_count,
2689 struct shared_file_mapping_slide_np *mappings)
2690 {
2691 int error = 0;
2692 kern_return_t kr = KERN_SUCCESS;
2693 struct _sr_file_mappings *sr_file_mappings = NULL;
2694 struct vnode *rdir_vp = NULL;
2695 struct vm_shared_region *shared_region = NULL;
2696
2697 /*
2698 * Get a reference to the current proc's root dir.
2699 * Need this to prevent racing with chroot.
2700 */
2701 proc_fdlock(p);
2702 rdir_vp = p->p_fd.fd_rdir;
2703 if (rdir_vp == NULL) {
2704 rdir_vp = rootvnode;
2705 }
2706 assert(rdir_vp != NULL);
2707 vnode_get(rdir_vp);
2708 proc_fdunlock(p);
2709
2710 /*
2711 * Turn files, mappings into sr_file_mappings and other setup.
2712 */
2713 error = shared_region_map_and_slide_setup(p, files_count,
2714 files, mappings_count, mappings,
2715 &sr_file_mappings, &shared_region, rdir_vp);
2716 if (error != 0) {
2717 vnode_put(rdir_vp);
2718 return error;
2719 }
2720
2721 /* map the file(s) into that shared region's submap */
2722 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2723 if (kr != KERN_SUCCESS) {
2724 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2725 "vm_shared_region_map_file() failed kr=0x%x\n",
2726 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2727 proc_getpid(p), p->p_comm, kr));
2728 }
2729
2730 /* convert kern_return_t to errno */
2731 switch (kr) {
2732 case KERN_SUCCESS:
2733 error = 0;
2734 break;
2735 case KERN_INVALID_ADDRESS:
2736 error = EFAULT;
2737 break;
2738 case KERN_PROTECTION_FAILURE:
2739 error = EPERM;
2740 break;
2741 case KERN_NO_SPACE:
2742 error = ENOMEM;
2743 break;
2744 case KERN_FAILURE:
2745 case KERN_INVALID_ARGUMENT:
2746 default:
2747 error = EINVAL;
2748 break;
2749 }
2750
2751 /*
2752 * Mark that this process is now using split libraries.
2753 */
2754 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2755 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2756 }
2757
2758 vnode_put(rdir_vp);
2759 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2760
2761 SHARED_REGION_TRACE_DEBUG(
2762 ("shared_region: %p [%d(%s)] <- map\n",
2763 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2764 proc_getpid(p), p->p_comm));
2765
2766 return error;
2767 }
2768
2769 /*
2770 * Clean up part of _shared_region_map_and_slide()
2771 * It had to be broken out of _shared_region_map_and_slide() to
2772 * prevent compiler inlining from blowing out the stack.
2773 */
2774 __attribute__((noinline))
2775 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2776 shared_region_map_and_slide_cleanup(
2777 struct proc *p,
2778 uint32_t files_count,
2779 struct _sr_file_mappings *sr_file_mappings,
2780 struct vm_shared_region *shared_region)
2781 {
2782 struct _sr_file_mappings *srfmp;
2783 struct vnode_attr va;
2784
2785 if (sr_file_mappings != NULL) {
2786 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2787 if (srfmp->vp != NULL) {
2788 vnode_lock_spin(srfmp->vp);
2789 srfmp->vp->v_flag |= VSHARED_DYLD;
2790 vnode_unlock(srfmp->vp);
2791
2792 /* update the vnode's access time */
2793 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2794 VATTR_INIT(&va);
2795 nanotime(&va.va_access_time);
2796 VATTR_SET_ACTIVE(&va, va_access_time);
2797 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2798 }
2799
2800 #if NAMEDSTREAMS
2801 /*
2802 * If the shared cache is compressed, it may
2803 * have a namedstream vnode instantiated for
2804 * for it. That namedstream vnode will also
2805 * have to be marked with VSHARED_DYLD.
2806 */
2807 if (vnode_hasnamedstreams(srfmp->vp)) {
2808 vnode_t svp;
2809 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2810 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2811 vnode_lock_spin(svp);
2812 svp->v_flag |= VSHARED_DYLD;
2813 vnode_unlock(svp);
2814 vnode_put(svp);
2815 }
2816 }
2817 #endif /* NAMEDSTREAMS */
2818 /*
2819 * release the vnode...
2820 * ubc_map() still holds it for us in the non-error case
2821 */
2822 (void) vnode_put(srfmp->vp);
2823 srfmp->vp = NULL;
2824 }
2825 if (srfmp->fp != NULL) {
2826 /* release the file descriptor */
2827 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2828 srfmp->fp = NULL;
2829 }
2830 }
2831 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2832 }
2833
2834 if (shared_region != NULL) {
2835 vm_shared_region_deallocate(shared_region);
2836 }
2837 }
2838
2839
2840 /*
2841 * For each file mapped, we may have mappings for:
2842 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2843 * so let's round up to 8 mappings per file.
2844 */
2845 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2846
2847 /*
2848 * This is the new interface for setting up shared region mappings.
2849 *
2850 * The slide used for shared regions setup using this interface is done differently
2851 * from the old interface. The slide value passed in the shared_files_np represents
2852 * a max value. The kernel will choose a random value based on that, then use it
2853 * for all shared regions.
2854 */
2855 #if defined (__x86_64__)
2856 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2857 #else
2858 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2859 #endif
2860
2861 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2862 shared_region_map_and_slide_2_np(
2863 struct proc *p,
2864 struct shared_region_map_and_slide_2_np_args *uap,
2865 __unused int *retvalp)
2866 {
2867 unsigned int files_count;
2868 struct shared_file_np *shared_files = NULL;
2869 unsigned int mappings_count;
2870 struct shared_file_mapping_slide_np *mappings = NULL;
2871 kern_return_t kr = KERN_SUCCESS;
2872
2873 files_count = uap->files_count;
2874 mappings_count = uap->mappings_count;
2875
2876 if (files_count == 0) {
2877 SHARED_REGION_TRACE_INFO(
2878 ("shared_region: %p [%d(%s)] map(): "
2879 "no files\n",
2880 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2881 proc_getpid(p), p->p_comm));
2882 kr = 0; /* no files to map: we're done ! */
2883 goto done;
2884 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2885 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2886 if (shared_files == NULL) {
2887 kr = KERN_RESOURCE_SHORTAGE;
2888 goto done;
2889 }
2890 } else {
2891 SHARED_REGION_TRACE_ERROR(
2892 ("shared_region: %p [%d(%s)] map(): "
2893 "too many files (%d) max %d\n",
2894 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2895 proc_getpid(p), p->p_comm,
2896 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2897 kr = KERN_FAILURE;
2898 goto done;
2899 }
2900
2901 if (mappings_count == 0) {
2902 SHARED_REGION_TRACE_INFO(
2903 ("shared_region: %p [%d(%s)] map(): "
2904 "no mappings\n",
2905 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2906 proc_getpid(p), p->p_comm));
2907 kr = 0; /* no mappings: we're done ! */
2908 goto done;
2909 } else if (mappings_count <= SFM_MAX) {
2910 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2911 if (mappings == NULL) {
2912 kr = KERN_RESOURCE_SHORTAGE;
2913 goto done;
2914 }
2915 } else {
2916 SHARED_REGION_TRACE_ERROR(
2917 ("shared_region: %p [%d(%s)] map(): "
2918 "too many mappings (%d) max %d\n",
2919 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2920 proc_getpid(p), p->p_comm,
2921 mappings_count, SFM_MAX));
2922 kr = KERN_FAILURE;
2923 goto done;
2924 }
2925
2926 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2927 if (kr != KERN_SUCCESS) {
2928 goto done;
2929 }
2930
2931 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2932 if (kr != KERN_SUCCESS) {
2933 goto done;
2934 }
2935
2936 uint32_t max_slide = shared_files[0].sf_slide;
2937 uint32_t random_val;
2938 uint32_t slide_amount;
2939
2940 if (max_slide != 0) {
2941 read_random(&random_val, sizeof random_val);
2942 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2943 } else {
2944 slide_amount = 0;
2945 }
2946 #if DEVELOPMENT || DEBUG
2947 extern bool bootarg_disable_aslr;
2948 if (bootarg_disable_aslr) {
2949 slide_amount = 0;
2950 }
2951 #endif /* DEVELOPMENT || DEBUG */
2952
2953 /*
2954 * Fix up the mappings to reflect the desired slide.
2955 */
2956 unsigned int f;
2957 unsigned int m = 0;
2958 unsigned int i;
2959 for (f = 0; f < files_count; ++f) {
2960 shared_files[f].sf_slide = slide_amount;
2961 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2962 if (m >= mappings_count) {
2963 SHARED_REGION_TRACE_ERROR(
2964 ("shared_region: %p [%d(%s)] map(): "
2965 "mapping count argument was too small\n",
2966 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2967 proc_getpid(p), p->p_comm));
2968 kr = KERN_FAILURE;
2969 goto done;
2970 }
2971 mappings[m].sms_address += slide_amount;
2972 if (mappings[m].sms_slide_size != 0) {
2973 mappings[m].sms_slide_start += slide_amount;
2974 }
2975 }
2976 }
2977
2978 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2979 done:
2980 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2981 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2982 return kr;
2983 }
2984
2985 /*
2986 * A syscall for dyld to use to map data pages that need load time relocation fixups.
2987 * The fixups are performed by a custom pager during page-in, so the pages still appear
2988 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2989 * on demand later, all w/o using the compressor.
2990 *
2991 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2992 * running, they are COW'd as normal.
2993 */
2994 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2995 map_with_linking_np(
2996 struct proc *p,
2997 struct map_with_linking_np_args *uap,
2998 __unused int *retvalp)
2999 {
3000 uint32_t region_count;
3001 uint32_t r;
3002 struct mwl_region *regions = NULL;
3003 struct mwl_region *rp;
3004 uint32_t link_info_size;
3005 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
3006 struct mwl_info_hdr *info_hdr = NULL;
3007 uint64_t binds_size;
3008 int fd;
3009 struct fileproc *fp = NULL;
3010 struct vnode *vp = NULL;
3011 size_t file_size;
3012 off_t fs;
3013 struct vnode_attr va;
3014 memory_object_control_t file_control = NULL;
3015 int error;
3016 kern_return_t kr = KERN_SUCCESS;
3017
3018 /*
3019 * Check if dyld has told us it finished with this call.
3020 */
3021 if (p->p_disallow_map_with_linking) {
3022 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3023 __func__, proc_getpid(p), p->p_comm);
3024 kr = KERN_FAILURE;
3025 goto done;
3026 }
3027
3028 /*
3029 * First we do some sanity checking on what dyld has passed us.
3030 */
3031 region_count = uap->region_count;
3032 link_info_size = uap->link_info_size;
3033 if (region_count == 0) {
3034 printf("%s: [%d(%s)]: region_count == 0\n",
3035 __func__, proc_getpid(p), p->p_comm);
3036 kr = KERN_FAILURE;
3037 goto done;
3038 }
3039 if (region_count > MWL_MAX_REGION_COUNT) {
3040 printf("%s: [%d(%s)]: region_count too big %d\n",
3041 __func__, proc_getpid(p), p->p_comm, region_count);
3042 kr = KERN_FAILURE;
3043 goto done;
3044 }
3045
3046 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3047 printf("%s: [%d(%s)]: link_info_size too small\n",
3048 __func__, proc_getpid(p), p->p_comm);
3049 kr = KERN_FAILURE;
3050 goto done;
3051 }
3052 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3053 printf("%s: [%d(%s)]: link_info_size too big %d\n",
3054 __func__, proc_getpid(p), p->p_comm, link_info_size);
3055 kr = KERN_FAILURE;
3056 goto done;
3057 }
3058
3059 /*
3060 * Allocate and copyin the regions and link info
3061 */
3062 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3063 if (regions == NULL) {
3064 printf("%s: [%d(%s)]: failed to allocate regions\n",
3065 __func__, proc_getpid(p), p->p_comm);
3066 kr = KERN_RESOURCE_SHORTAGE;
3067 goto done;
3068 }
3069 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3070 if (kr != KERN_SUCCESS) {
3071 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3072 __func__, proc_getpid(p), p->p_comm, kr);
3073 goto done;
3074 }
3075
3076 link_info = kalloc_data(link_info_size, Z_WAITOK);
3077 if (link_info == NULL) {
3078 printf("%s: [%d(%s)]: failed to allocate link_info\n",
3079 __func__, proc_getpid(p), p->p_comm);
3080 kr = KERN_RESOURCE_SHORTAGE;
3081 goto done;
3082 }
3083 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3084 if (kr != KERN_SUCCESS) {
3085 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3086 __func__, proc_getpid(p), p->p_comm, kr);
3087 goto done;
3088 }
3089
3090 /*
3091 * Do some verification the data structures.
3092 */
3093 info_hdr = (struct mwl_info_hdr *)link_info;
3094 if (info_hdr->mwli_version != MWL_INFO_VERS) {
3095 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3096 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3097 kr = KERN_FAILURE;
3098 goto done;
3099 }
3100
3101 if (info_hdr->mwli_binds_offset > link_info_size) {
3102 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3103 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3104 kr = KERN_FAILURE;
3105 goto done;
3106 }
3107
3108 /* some older devs have s/w page size > h/w page size, no need to support them */
3109 if (info_hdr->mwli_page_size != PAGE_SIZE) {
3110 /* no printf, since this is expected on some devices */
3111 kr = KERN_INVALID_ARGUMENT;
3112 goto done;
3113 }
3114
3115 binds_size = (uint64_t)info_hdr->mwli_binds_count *
3116 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3117 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3118 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3119 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3120 kr = KERN_FAILURE;
3121 goto done;
3122 }
3123
3124 if (info_hdr->mwli_chains_offset > link_info_size) {
3125 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3126 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3127 kr = KERN_FAILURE;
3128 goto done;
3129 }
3130
3131
3132 /*
3133 * Ensure the chained starts in the link info and make sure the
3134 * segment info offsets are within bounds.
3135 */
3136 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3137 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3138 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3139 kr = KERN_FAILURE;
3140 goto done;
3141 }
3142 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3143 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3144 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3145 kr = KERN_FAILURE;
3146 goto done;
3147 }
3148
3149 /* Note that more verification of offsets is done in the pager itself */
3150
3151 /*
3152 * Ensure we've only been given one FD and verify valid protections.
3153 */
3154 fd = regions[0].mwlr_fd;
3155 for (r = 0; r < region_count; ++r) {
3156 if (regions[r].mwlr_fd != fd) {
3157 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3158 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3159 kr = KERN_FAILURE;
3160 goto done;
3161 }
3162 regions[r].mwlr_protections &= VM_PROT_ALL;
3163 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3164 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3165 __func__, proc_getpid(p), p->p_comm);
3166 kr = KERN_FAILURE;
3167 goto done;
3168 }
3169 }
3170
3171
3172 /* get file structure from file descriptor */
3173 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3174 if (error) {
3175 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3176 __func__, proc_getpid(p), p->p_comm, error);
3177 kr = KERN_FAILURE;
3178 goto done;
3179 }
3180
3181 /* We need at least read permission on the file */
3182 if (!(fp->fp_glob->fg_flag & FREAD)) {
3183 printf("%s: [%d(%s)]: not readable\n",
3184 __func__, proc_getpid(p), p->p_comm);
3185 kr = KERN_FAILURE;
3186 goto done;
3187 }
3188
3189 /* Get the vnode from file structure */
3190 vp = (struct vnode *)fp_get_data(fp);
3191 error = vnode_getwithref(vp);
3192 if (error) {
3193 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3194 __func__, proc_getpid(p), p->p_comm, error);
3195 kr = KERN_FAILURE;
3196 vp = NULL; /* just to be sure */
3197 goto done;
3198 }
3199
3200 /* Make sure the vnode is a regular file */
3201 if (vp->v_type != VREG) {
3202 printf("%s: [%d(%s)]: vnode not VREG\n",
3203 __func__, proc_getpid(p), p->p_comm);
3204 kr = KERN_FAILURE;
3205 goto done;
3206 }
3207
3208 /* get vnode size */
3209 error = vnode_size(vp, &fs, vfs_context_current());
3210 if (error) {
3211 goto done;
3212 }
3213 file_size = fs;
3214
3215 /* get the file's memory object handle */
3216 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3217 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3218 printf("%s: [%d(%s)]: no memory object\n",
3219 __func__, proc_getpid(p), p->p_comm);
3220 kr = KERN_FAILURE;
3221 goto done;
3222 }
3223
3224 for (r = 0; r < region_count; ++r) {
3225 rp = ®ions[r];
3226
3227 /*
3228 * Only allow data mappings and not zero fill.
3229 */
3230 if (rp->mwlr_protections & VM_PROT_ZF) {
3231 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF\n",
3232 __func__, proc_getpid(p), p->p_comm, r);
3233 kr = KERN_FAILURE;
3234 goto done;
3235 }
3236 if (rp->mwlr_protections & VM_PROT_EXECUTE) {
3237 printf("%s: [%d(%s)]: region %d, found VM_PROT_EXECUTE\n",
3238 __func__, proc_getpid(p), p->p_comm, r);
3239 kr = KERN_FAILURE;
3240 goto done;
3241 }
3242
3243 #if CONFIG_MACF
3244 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3245 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3246 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3247 if (error) {
3248 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3249 __func__, proc_getpid(p), p->p_comm, r, error);
3250 kr = KERN_FAILURE;
3251 goto done;
3252 }
3253 #endif /* MAC */
3254
3255 /* check that the mappings are properly covered by code signatures */
3256 if (cs_system_enforcement()) {
3257 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3258 printf("%s: [%d(%s)]: region %d, not code signed\n",
3259 __func__, proc_getpid(p), p->p_comm, r);
3260 kr = KERN_FAILURE;
3261 goto done;
3262 }
3263 }
3264 }
3265
3266 /* update the vnode's access time */
3267 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3268 VATTR_INIT(&va);
3269 nanotime(&va.va_access_time);
3270 VATTR_SET_ACTIVE(&va, va_access_time);
3271 vnode_setattr(vp, &va, vfs_context_current());
3272 }
3273
3274 /* get the VM to do the work */
3275 kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3276
3277 done:
3278 if (fp != NULL) {
3279 /* release the file descriptor */
3280 fp_drop(p, fd, fp, 0);
3281 }
3282 if (vp != NULL) {
3283 (void)vnode_put(vp);
3284 }
3285 if (regions != NULL) {
3286 kfree_data(regions, region_count * sizeof(regions[0]));
3287 }
3288 /* link info is used in the pager if things worked */
3289 if (link_info != NULL && kr != KERN_SUCCESS) {
3290 kfree_data(link_info, link_info_size);
3291 }
3292
3293 switch (kr) {
3294 case KERN_SUCCESS:
3295 return 0;
3296 case KERN_RESOURCE_SHORTAGE:
3297 return ENOMEM;
3298 default:
3299 return EINVAL;
3300 }
3301 }
3302
3303 #if DEBUG || DEVELOPMENT
3304 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3305 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3306 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3307 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3308 #endif /* DEBUG || DEVELOPMENT */
3309
3310 /* sysctl overflow room */
3311
3312 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3313 (int *) &page_size, 0, "vm page size");
3314
3315 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3316 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3317 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3318 extern unsigned int vm_page_free_target;
3319 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3320 &vm_page_free_target, 0, "Pageout daemon free target");
3321
3322 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3323 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3324
3325 static int
3326 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3327 {
3328 #pragma unused(oidp, arg1, arg2)
3329 unsigned int page_free_wanted;
3330
3331 page_free_wanted = mach_vm_ctl_page_free_wanted();
3332 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3333 }
3334 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3335 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3336 0, 0, vm_ctl_page_free_wanted, "I", "");
3337
3338 extern unsigned int vm_page_purgeable_count;
3339 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3340 &vm_page_purgeable_count, 0, "Purgeable page count");
3341
3342 extern unsigned int vm_page_purgeable_wired_count;
3343 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3344 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3345
3346 extern unsigned int vm_page_kern_lpage_count;
3347 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3348 &vm_page_kern_lpage_count, 0, "kernel used large pages");
3349
3350 #if DEVELOPMENT || DEBUG
3351 #if __ARM_MIXED_PAGE_SIZE__
3352 static int vm_mixed_pagesize_supported = 1;
3353 #else
3354 static int vm_mixed_pagesize_supported = 0;
3355 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3356 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3357 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3358
3359 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3360 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3361 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3362 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3363
3364 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3365 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3366 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3367 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3368 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3369 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3370
3371 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3372 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3373 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3374 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3375 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3376 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3377 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3378 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3379 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3380 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3381 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3382 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3383 #endif /* DEVELOPMENT || DEBUG */
3384
3385 extern int madvise_free_debug;
3386 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3387 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3388 extern int madvise_free_debug_sometimes;
3389 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3390 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3391
3392 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3393 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3394 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3395 &vm_page_stats_reusable.reusable_pages_success, "");
3396 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3397 &vm_page_stats_reusable.reusable_pages_failure, "");
3398 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3399 &vm_page_stats_reusable.reusable_pages_shared, "");
3400 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3401 &vm_page_stats_reusable.all_reusable_calls, "");
3402 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3403 &vm_page_stats_reusable.partial_reusable_calls, "");
3404 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3405 &vm_page_stats_reusable.reuse_pages_success, "");
3406 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3407 &vm_page_stats_reusable.reuse_pages_failure, "");
3408 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3409 &vm_page_stats_reusable.all_reuse_calls, "");
3410 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3411 &vm_page_stats_reusable.partial_reuse_calls, "");
3412 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3413 &vm_page_stats_reusable.can_reuse_success, "");
3414 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3415 &vm_page_stats_reusable.can_reuse_failure, "");
3416 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3417 &vm_page_stats_reusable.reusable_reclaimed, "");
3418 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3419 &vm_page_stats_reusable.reusable_nonwritable, "");
3420 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3421 &vm_page_stats_reusable.reusable_shared, "");
3422 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3423 &vm_page_stats_reusable.free_shared, "");
3424
3425
3426 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3427 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3428 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3429
3430 extern unsigned int vm_page_cleaned_count;
3431 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3432
3433 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3434 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3435 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3436
3437 /* pageout counts */
3438 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3439 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3440
3441 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3442 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3443 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3444 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3445 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3446 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3447
3448 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3449 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3450 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3451 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3452 extern unsigned int vm_page_realtime_count;
3453 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3454 extern int vm_pageout_protect_realtime;
3455 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3456
3457 /* counts of pages prefaulted when entering a memory object */
3458 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3459 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3460 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3461
3462 #if defined (__x86_64__)
3463 extern unsigned int vm_clump_promote_threshold;
3464 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3465 #if DEVELOPMENT || DEBUG
3466 extern unsigned long vm_clump_stats[];
3467 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3468 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3469 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3470 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3471 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3472 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3473 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3474 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3475 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3476 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3477 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3478 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3479 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3480 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3481 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3482 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3483 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3488 #endif /* if DEVELOPMENT || DEBUG */
3489 #endif /* #if defined (__x86_64__) */
3490
3491 #if CONFIG_SECLUDED_MEMORY
3492
3493 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3494 extern unsigned int vm_page_secluded_target;
3495 extern unsigned int vm_page_secluded_count;
3496 extern unsigned int vm_page_secluded_count_free;
3497 extern unsigned int vm_page_secluded_count_inuse;
3498 extern unsigned int vm_page_secluded_count_over_target;
3499 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3500 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3501 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3502 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3503 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3504
3505 extern struct vm_page_secluded_data vm_page_secluded;
3506 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3507 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3508 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3509 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3510 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3511 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3512 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3513 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3514 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3515
3516 #endif /* CONFIG_SECLUDED_MEMORY */
3517
3518 #include <kern/thread.h>
3519 #include <sys/user.h>
3520
3521 void vm_pageout_io_throttle(void);
3522
3523 void
vm_pageout_io_throttle(void)3524 vm_pageout_io_throttle(void)
3525 {
3526 struct uthread *uthread = current_uthread();
3527
3528 /*
3529 * thread is marked as a low priority I/O type
3530 * and the I/O we issued while in this cleaning operation
3531 * collided with normal I/O operations... we'll
3532 * delay in order to mitigate the impact of this
3533 * task on the normal operation of the system
3534 */
3535
3536 if (uthread->uu_lowpri_window) {
3537 throttle_lowpri_io(1);
3538 }
3539 }
3540
3541 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3542 vm_pressure_monitor(
3543 __unused struct proc *p,
3544 struct vm_pressure_monitor_args *uap,
3545 int *retval)
3546 {
3547 kern_return_t kr;
3548 uint32_t pages_reclaimed;
3549 uint32_t pages_wanted;
3550
3551 kr = mach_vm_pressure_monitor(
3552 (boolean_t) uap->wait_for_pressure,
3553 uap->nsecs_monitored,
3554 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3555 &pages_wanted);
3556
3557 switch (kr) {
3558 case KERN_SUCCESS:
3559 break;
3560 case KERN_ABORTED:
3561 return EINTR;
3562 default:
3563 return EINVAL;
3564 }
3565
3566 if (uap->pages_reclaimed) {
3567 if (copyout((void *)&pages_reclaimed,
3568 uap->pages_reclaimed,
3569 sizeof(pages_reclaimed)) != 0) {
3570 return EFAULT;
3571 }
3572 }
3573
3574 *retval = (int) pages_wanted;
3575 return 0;
3576 }
3577
3578 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3579 kas_info(struct proc *p,
3580 struct kas_info_args *uap,
3581 int *retval __unused)
3582 {
3583 #ifndef CONFIG_KAS_INFO
3584 (void)p;
3585 (void)uap;
3586 return ENOTSUP;
3587 #else /* CONFIG_KAS_INFO */
3588 int selector = uap->selector;
3589 user_addr_t valuep = uap->value;
3590 user_addr_t sizep = uap->size;
3591 user_size_t size, rsize;
3592 int error;
3593
3594 if (!kauth_cred_issuser(kauth_cred_get())) {
3595 return EPERM;
3596 }
3597
3598 #if CONFIG_MACF
3599 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3600 if (error) {
3601 return error;
3602 }
3603 #endif
3604
3605 if (IS_64BIT_PROCESS(p)) {
3606 user64_size_t size64;
3607 error = copyin(sizep, &size64, sizeof(size64));
3608 size = (user_size_t)size64;
3609 } else {
3610 user32_size_t size32;
3611 error = copyin(sizep, &size32, sizeof(size32));
3612 size = (user_size_t)size32;
3613 }
3614 if (error) {
3615 return error;
3616 }
3617
3618 switch (selector) {
3619 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3620 {
3621 uint64_t slide = vm_kernel_slide;
3622
3623 if (sizeof(slide) != size) {
3624 return EINVAL;
3625 }
3626
3627 error = copyout(&slide, valuep, sizeof(slide));
3628 if (error) {
3629 return error;
3630 }
3631 rsize = size;
3632 }
3633 break;
3634 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3635 {
3636 uint32_t i;
3637 kernel_mach_header_t *mh = &_mh_execute_header;
3638 struct load_command *cmd;
3639 cmd = (struct load_command*) &mh[1];
3640 uint64_t *bases;
3641 rsize = mh->ncmds * sizeof(uint64_t);
3642
3643 /*
3644 * Return the size if no data was passed
3645 */
3646 if (valuep == 0) {
3647 break;
3648 }
3649
3650 if (rsize > size) {
3651 return EINVAL;
3652 }
3653
3654 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3655
3656 for (i = 0; i < mh->ncmds; i++) {
3657 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3658 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3659 bases[i] = (uint64_t)sg->vmaddr;
3660 }
3661 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3662 }
3663
3664 error = copyout(bases, valuep, rsize);
3665
3666 kfree_data(bases, rsize);
3667
3668 if (error) {
3669 return error;
3670 }
3671 }
3672 break;
3673 default:
3674 return EINVAL;
3675 }
3676
3677 if (IS_64BIT_PROCESS(p)) {
3678 user64_size_t size64 = (user64_size_t)rsize;
3679 error = copyout(&size64, sizep, sizeof(size64));
3680 } else {
3681 user32_size_t size32 = (user32_size_t)rsize;
3682 error = copyout(&size32, sizep, sizeof(size32));
3683 }
3684
3685 return error;
3686 #endif /* CONFIG_KAS_INFO */
3687 }
3688
3689 #if __has_feature(ptrauth_calls)
3690 /*
3691 * Generate a random pointer signing key that isn't 0.
3692 */
3693 uint64_t
generate_jop_key(void)3694 generate_jop_key(void)
3695 {
3696 uint64_t key;
3697
3698 do {
3699 read_random(&key, sizeof key);
3700 } while (key == 0);
3701 return key;
3702 }
3703 #endif /* __has_feature(ptrauth_calls) */
3704
3705
3706 #pragma clang diagnostic push
3707 #pragma clang diagnostic ignored "-Wcast-qual"
3708 #pragma clang diagnostic ignored "-Wunused-function"
3709
3710 static void
asserts()3711 asserts()
3712 {
3713 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3714 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3715 }
3716
3717 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3718 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3719 #pragma clang diagnostic pop
3720
3721 extern uint32_t vm_page_pages;
3722 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3723
3724 extern uint32_t vm_page_busy_absent_skipped;
3725 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3726
3727 extern uint32_t vm_page_upl_tainted;
3728 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3729
3730 extern uint32_t vm_page_iopl_tainted;
3731 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3732
3733 #if __arm64__ && (DEVELOPMENT || DEBUG)
3734 extern int vm_footprint_suspend_allowed;
3735 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3736
3737 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3738 static int
3739 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3740 {
3741 #pragma unused(oidp, arg1, arg2)
3742 int error = 0;
3743 int new_value;
3744
3745 if (req->newptr == USER_ADDR_NULL) {
3746 return 0;
3747 }
3748 error = SYSCTL_IN(req, &new_value, sizeof(int));
3749 if (error) {
3750 return error;
3751 }
3752 if (!vm_footprint_suspend_allowed) {
3753 if (new_value != 0) {
3754 /* suspends are not allowed... */
3755 return 0;
3756 }
3757 /* ... but let resumes proceed */
3758 }
3759 DTRACE_VM2(footprint_suspend,
3760 vm_map_t, current_map(),
3761 int, new_value);
3762
3763 pmap_footprint_suspend(current_map(), new_value);
3764
3765 return 0;
3766 }
3767 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3768 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3769 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3770 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3771
3772 extern uint64_t vm_map_corpse_footprint_count;
3773 extern uint64_t vm_map_corpse_footprint_size_avg;
3774 extern uint64_t vm_map_corpse_footprint_size_max;
3775 extern uint64_t vm_map_corpse_footprint_full;
3776 extern uint64_t vm_map_corpse_footprint_no_buf;
3777 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3778 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3779 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3780 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3781 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3782 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3783 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3784 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3785 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3786 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3787
3788 #if CODE_SIGNING_MONITOR
3789 extern uint64_t vm_cs_defer_to_csm;
3790 extern uint64_t vm_cs_defer_to_csm_not;
3791 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3792 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3793 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3794 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3795 #endif /* CODE_SIGNING_MONITOR */
3796
3797 extern uint64_t shared_region_pager_copied;
3798 extern uint64_t shared_region_pager_slid;
3799 extern uint64_t shared_region_pager_slid_error;
3800 extern uint64_t shared_region_pager_reclaimed;
3801 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3802 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3803 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3804 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3805 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3806 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3807 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3808 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3809 extern int shared_region_destroy_delay;
3810 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3811 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3812
3813 #if MACH_ASSERT
3814 extern int pmap_ledgers_panic_leeway;
3815 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3816 #endif /* MACH_ASSERT */
3817
3818
3819 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3820 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3821 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3822 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3823 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3824 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3825 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3826 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3827 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3828 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3829 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3830 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3831 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3832 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3833 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3834 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3835 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3836 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3837 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3838 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3839 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3840 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3841 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3842 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3843 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3844 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3845 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3846 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3847 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3848 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3849 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3850 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3851 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3852 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3853 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3854 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3855 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3856 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3857 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3858
3859 extern int vm_protect_privileged_from_untrusted;
3860 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3861 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3862 extern uint64_t vm_copied_on_read;
3863 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3864 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3865
3866 extern int vm_shared_region_count;
3867 extern int vm_shared_region_peak;
3868 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3869 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3870 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3871 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3872 #if DEVELOPMENT || DEBUG
3873 extern unsigned int shared_region_pagers_resident_count;
3874 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3875 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3876 extern unsigned int shared_region_pagers_resident_peak;
3877 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3878 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3879 extern int shared_region_pager_count;
3880 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3881 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3882 #if __has_feature(ptrauth_calls)
3883 extern int shared_region_key_count;
3884 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3885 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3886 extern int vm_shared_region_reslide_count;
3887 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3888 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3889 #endif /* __has_feature(ptrauth_calls) */
3890 #endif /* DEVELOPMENT || DEBUG */
3891
3892 #if MACH_ASSERT
3893 extern int debug4k_filter;
3894 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3895 extern int debug4k_panic_on_terminate;
3896 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3897 extern int debug4k_panic_on_exception;
3898 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3899 extern int debug4k_panic_on_misaligned_sharing;
3900 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3901 #endif /* MACH_ASSERT */
3902
3903 extern uint64_t vm_map_set_size_limit_count;
3904 extern uint64_t vm_map_set_data_limit_count;
3905 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3906 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3907 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3908 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3909 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3910 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3911
3912 extern uint64_t vm_fault_resilient_media_initiate;
3913 extern uint64_t vm_fault_resilient_media_retry;
3914 extern uint64_t vm_fault_resilient_media_proceed;
3915 extern uint64_t vm_fault_resilient_media_release;
3916 extern uint64_t vm_fault_resilient_media_abort1;
3917 extern uint64_t vm_fault_resilient_media_abort2;
3918 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3919 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3920 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3921 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3922 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3923 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3924 #if MACH_ASSERT
3925 extern int vm_fault_resilient_media_inject_error1_rate;
3926 extern int vm_fault_resilient_media_inject_error1;
3927 extern int vm_fault_resilient_media_inject_error2_rate;
3928 extern int vm_fault_resilient_media_inject_error2;
3929 extern int vm_fault_resilient_media_inject_error3_rate;
3930 extern int vm_fault_resilient_media_inject_error3;
3931 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3932 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3933 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3934 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3935 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3936 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3937 #endif /* MACH_ASSERT */
3938
3939 extern uint64_t pmap_query_page_info_retries;
3940 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3941
3942 /*
3943 * A sysctl which causes all existing shared regions to become stale. They
3944 * will no longer be used by anything new and will be torn down as soon as
3945 * the last existing user exits. A write of non-zero value causes that to happen.
3946 * This should only be used by launchd, so we check that this is initproc.
3947 */
3948 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3949 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3950 {
3951 unsigned int value = 0;
3952 int changed = 0;
3953 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3954 if (error || !changed) {
3955 return error;
3956 }
3957 if (current_proc() != initproc) {
3958 return EPERM;
3959 }
3960
3961 vm_shared_region_pivot();
3962
3963 return 0;
3964 }
3965
3966 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3967 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3968 0, 0, shared_region_pivot, "I", "");
3969
3970 extern uint64_t vm_object_shadow_forced;
3971 extern uint64_t vm_object_shadow_skipped;
3972 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3973 &vm_object_shadow_forced, "");
3974 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3975 &vm_object_shadow_skipped, "");
3976
3977 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3978 &vmtc_total, 0, "total text page corruptions detected");
3979
3980
3981 #if DEBUG || DEVELOPMENT
3982 /*
3983 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3984 * Used for testing text page self healing.
3985 */
3986 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3987 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3988 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3989 {
3990 uint64_t value = 0;
3991 int error = sysctl_handle_quad(oidp, &value, 0, req);
3992 if (error || !req->newptr) {
3993 return error;
3994 }
3995
3996 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3997 return 0;
3998 } else {
3999 return EINVAL;
4000 }
4001 }
4002
4003 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4004 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4005 0, 0, corrupt_text_addr, "-", "");
4006 #endif /* DEBUG || DEVELOPMENT */
4007
4008 #if DEBUG || DEVELOPMENT
4009 #if CONFIG_MAP_RANGES
4010 static int
4011 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4012 {
4013 #pragma unused(arg1, arg2, oidp)
4014 struct mach_vm_range range;
4015
4016 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4017 != KERN_SUCCESS) {
4018 return EINVAL;
4019 }
4020
4021 return SYSCTL_OUT(req, &range, sizeof(range));
4022 }
4023
4024 static int
4025 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4026 {
4027 #pragma unused(arg1, arg2, oidp)
4028 struct mach_vm_range range;
4029
4030 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4031 != KERN_SUCCESS) {
4032 return EINVAL;
4033 }
4034
4035 return SYSCTL_OUT(req, &range, sizeof(range));
4036 }
4037
4038 /*
4039 * A sysctl that can be used to return ranges for the current VM map.
4040 * Used for testing VM ranges.
4041 */
4042 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4043 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4044 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4045 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4046
4047 #endif /* CONFIG_MAP_RANGES */
4048 #endif /* DEBUG || DEVELOPMENT */
4049
4050 #if DEBUG || DEVELOPMENT
4051 #endif /* DEBUG || DEVELOPMENT */
4052
4053 extern uint64_t c_seg_filled_no_contention;
4054 extern uint64_t c_seg_filled_contention;
4055 extern clock_sec_t c_seg_filled_contention_sec_max;
4056 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4057 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4058 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4059 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4060 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4061 #if (XNU_TARGET_OS_OSX && __arm64__)
4062 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4063 extern int c_process_major_yield_after; /* yield after moving ? segments */
4064 extern uint64_t c_process_major_reports;
4065 extern clock_sec_t c_process_major_max_sec;
4066 extern clock_nsec_t c_process_major_max_nsec;
4067 extern uint32_t c_process_major_peak_segcount;
4068 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4069 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4070 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4071 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4072 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4073 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4074 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4075
4076 #if DEVELOPMENT || DEBUG
4077 extern int panic_object_not_alive;
4078 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4079 #endif /* DEVELOPMENT || DEBUG */
4080
4081 #if MACH_ASSERT
4082 extern int fbdp_no_panic;
4083 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4084 #endif /* MACH_ASSERT */
4085