1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #include <sys/code_signing.h>
86 #if NECP
87 #include <net/necp.h>
88 #endif /* NECP */
89 #if SKYWALK
90 #include <skywalk/os_channel.h>
91 #endif /* SKYWALK */
92
93 #include <security/audit/audit.h>
94 #include <security/mac.h>
95 #include <bsm/audit_kevents.h>
96
97 #include <kern/kalloc.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_kern.h>
100 #include <vm/vm_pageout.h>
101
102 #include <mach/shared_region.h>
103 #include <vm/vm_shared_region.h>
104
105 #include <vm/vm_dyld_pager.h>
106
107 #include <vm/vm_protos.h>
108
109 #include <sys/kern_memorystatus.h>
110 #include <sys/kern_memorystatus_freeze.h>
111 #include <sys/proc_internal.h>
112
113 #include <mach-o/fixup-chains.h>
114
115 #if CONFIG_MACF
116 #include <security/mac_framework.h>
117 #endif
118
119 #include <kern/bits.h>
120
121 #if CONFIG_CSR
122 #include <sys/csr.h>
123 #endif /* CONFIG_CSR */
124 #include <sys/trust_caches.h>
125 #include <libkern/amfi/amfi.h>
126 #include <IOKit/IOBSD.h>
127
128 #if VM_MAP_DEBUG_APPLE_PROTECT
129 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
130 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
131
132 #if VM_MAP_DEBUG_FOURK
133 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
134 #endif /* VM_MAP_DEBUG_FOURK */
135
136 #if DEVELOPMENT || DEBUG
137
138 static int
139 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
140 {
141 #pragma unused(arg1, arg2)
142 vm_offset_t kaddr;
143 kern_return_t kr;
144 int error = 0;
145 int size = 0;
146
147 error = sysctl_handle_int(oidp, &size, 0, req);
148 if (error || !req->newptr) {
149 return error;
150 }
151
152 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
153 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
154
155 if (kr == KERN_SUCCESS) {
156 kmem_free(kernel_map, kaddr, size);
157 }
158
159 return error;
160 }
161
162 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
163 0, 0, &sysctl_kmem_alloc_contig, "I", "");
164
165 extern int vm_region_footprint;
166 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
167
168 static int
169 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
170 {
171 #pragma unused(arg1, arg2, oidp)
172 kmem_gobj_stats stats = kmem_get_gobj_stats();
173
174 return SYSCTL_OUT(req, &stats, sizeof(stats));
175 }
176
177 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
178 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
179 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
180
181 #endif /* DEVELOPMENT || DEBUG */
182
183 static int
184 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
185 {
186 #pragma unused(arg1, arg2, oidp)
187 int error = 0;
188 int value;
189
190 value = task_self_region_footprint();
191 error = SYSCTL_OUT(req, &value, sizeof(int));
192 if (error) {
193 return error;
194 }
195
196 if (!req->newptr) {
197 return 0;
198 }
199
200 error = SYSCTL_IN(req, &value, sizeof(int));
201 if (error) {
202 return error;
203 }
204 task_self_region_footprint_set(value);
205 return 0;
206 }
207 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
208
209 static int
210 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
211 {
212 #pragma unused(arg1, arg2, oidp)
213 int error = 0;
214 int value;
215
216 value = (1 << thread_self_region_page_shift());
217 error = SYSCTL_OUT(req, &value, sizeof(int));
218 if (error) {
219 return error;
220 }
221
222 if (!req->newptr) {
223 return 0;
224 }
225
226 error = SYSCTL_IN(req, &value, sizeof(int));
227 if (error) {
228 return error;
229 }
230
231 if (value != 0 && value != 4096 && value != 16384) {
232 return EINVAL;
233 }
234
235 #if !__ARM_MIXED_PAGE_SIZE__
236 if (value != vm_map_page_size(current_map())) {
237 return EINVAL;
238 }
239 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
240
241 thread_self_region_page_shift_set(bit_first(value));
242 return 0;
243 }
244 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
245
246
247 #if DEVELOPMENT || DEBUG
248 extern int panic_on_unsigned_execute;
249 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
250
251 extern int vm_log_xnu_user_debug;
252 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
253 #endif /* DEVELOPMENT || DEBUG */
254
255 extern int cs_executable_create_upl;
256 extern int cs_executable_wire;
257 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
259
260 extern int apple_protect_pager_count;
261 extern int apple_protect_pager_count_mapped;
262 extern unsigned int apple_protect_pager_cache_limit;
263 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
265 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
266
267 #if DEVELOPMENT || DEBUG
268 extern int radar_20146450;
269 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
270
271 extern int macho_printf;
272 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
273
274 extern int apple_protect_pager_data_request_debug;
275 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
276
277 #if __arm64__
278 /* These are meant to support the page table accounting unit test. */
279 extern unsigned int arm_hardware_page_size;
280 extern unsigned int arm_pt_desc_size;
281 extern unsigned int arm_pt_root_size;
282 extern unsigned int inuse_user_tteroot_count;
283 extern unsigned int inuse_kernel_tteroot_count;
284 extern unsigned int inuse_user_ttepages_count;
285 extern unsigned int inuse_kernel_ttepages_count;
286 extern unsigned int inuse_user_ptepages_count;
287 extern unsigned int inuse_kernel_ptepages_count;
288 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
289 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
297 extern unsigned int free_page_size_tt_count;
298 extern unsigned int free_two_page_size_tt_count;
299 extern unsigned int free_tt_count;
300 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
301 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
303 #if DEVELOPMENT || DEBUG
304 extern unsigned long pmap_asid_flushes;
305 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
306 extern unsigned long pmap_asid_hits;
307 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
308 extern unsigned long pmap_asid_misses;
309 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
310 #endif
311 #endif /* __arm64__ */
312
313 #if __arm64__
314 extern int fourk_pager_data_request_debug;
315 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
316 #endif /* __arm64__ */
317 #endif /* DEVELOPMENT || DEBUG */
318
319 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
328 #if VM_SCAN_FOR_SHADOW_CHAIN
329 static int vm_shadow_max_enabled = 0; /* Disabled by default */
330 extern int proc_shadow_max(void);
331 static int
332 vm_shadow_max SYSCTL_HANDLER_ARGS
333 {
334 #pragma unused(arg1, arg2, oidp)
335 int value = 0;
336
337 if (vm_shadow_max_enabled) {
338 value = proc_shadow_max();
339 }
340
341 return SYSCTL_OUT(req, &value, sizeof(value));
342 }
343 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
344 0, 0, &vm_shadow_max, "I", "");
345
346 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
347
348 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
349
350 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
351
352 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
353 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
354 /*
355 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
356 */
357
358 #if DEVELOPMENT || DEBUG
359 extern int allow_stack_exec, allow_data_exec;
360
361 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
362 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
363
364 #endif /* DEVELOPMENT || DEBUG */
365
366 static const char *prot_values[] = {
367 "none",
368 "read-only",
369 "write-only",
370 "read-write",
371 "execute-only",
372 "read-execute",
373 "write-execute",
374 "read-write-execute"
375 };
376
377 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)378 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
379 {
380 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
381 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
382 }
383
384 /*
385 * shared_region_unnest_logging: level of logging of unnesting events
386 * 0 - no logging
387 * 1 - throttled logging of unexpected unnesting events (default)
388 * 2 - unthrottled logging of unexpected unnesting events
389 * 3+ - unthrottled logging of all unnesting events
390 */
391 int shared_region_unnest_logging = 1;
392
393 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
394 &shared_region_unnest_logging, 0, "");
395
396 int vm_shared_region_unnest_log_interval = 10;
397 int shared_region_unnest_log_count_threshold = 5;
398
399
400 #if XNU_TARGET_OS_OSX
401
402 #if defined (__x86_64__)
403 static int scdir_enforce = 1;
404 #else /* defined (__x86_64__) */
405 static int scdir_enforce = 0; /* AOT caches live elsewhere */
406 #endif /* defined (__x86_64__) */
407
408 static char *scdir_path[] = {
409 "/System/Library/dyld/",
410 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
411 "/System/Cryptexes/OS/System/Library/dyld",
412 NULL
413 };
414
415 #else /* XNU_TARGET_OS_OSX */
416
417 static int scdir_enforce = 0;
418 static char *scdir_path[] = {
419 "/System/Library/Caches/com.apple.dyld/",
420 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
421 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 NULL
423 };
424
425 #endif /* XNU_TARGET_OS_OSX */
426
427 static char *driverkit_scdir_path[] = {
428 "/System/DriverKit/System/Library/dyld/",
429 #if XNU_TARGET_OS_OSX
430 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
431 #else
432 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
433 #endif /* XNU_TARGET_OS_OSX */
434 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
435 NULL
436 };
437
438 #ifndef SECURE_KERNEL
439 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
440 {
441 #if CONFIG_CSR
442 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
443 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
444 return EPERM;
445 }
446 #endif /* CONFIG_CSR */
447 return sysctl_handle_int(oidp, arg1, arg2, req);
448 }
449
450 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
451 #endif
452
453 /* These log rate throttling state variables aren't thread safe, but
454 * are sufficient unto the task.
455 */
456 static int64_t last_unnest_log_time = 0;
457 static int shared_region_unnest_log_count = 0;
458
459 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)460 log_unnest_badness(
461 vm_map_t m,
462 vm_map_offset_t s,
463 vm_map_offset_t e,
464 boolean_t is_nested_map,
465 vm_map_offset_t lowest_unnestable_addr)
466 {
467 struct timeval tv;
468
469 if (shared_region_unnest_logging == 0) {
470 return;
471 }
472
473 if (shared_region_unnest_logging <= 2 &&
474 is_nested_map &&
475 s >= lowest_unnestable_addr) {
476 /*
477 * Unnesting of writable map entries is fine.
478 */
479 return;
480 }
481
482 if (shared_region_unnest_logging <= 1) {
483 microtime(&tv);
484 if ((tv.tv_sec - last_unnest_log_time) <
485 vm_shared_region_unnest_log_interval) {
486 if (shared_region_unnest_log_count++ >
487 shared_region_unnest_log_count_threshold) {
488 return;
489 }
490 } else {
491 last_unnest_log_time = tv.tv_sec;
492 shared_region_unnest_log_count = 0;
493 }
494 }
495
496 DTRACE_VM4(log_unnest_badness,
497 vm_map_t, m,
498 vm_map_offset_t, s,
499 vm_map_offset_t, e,
500 vm_map_offset_t, lowest_unnestable_addr);
501 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
502 }
503
504 uint64_t
vm_purge_filebacked_pagers(void)505 vm_purge_filebacked_pagers(void)
506 {
507 uint64_t pages_purged;
508
509 pages_purged = 0;
510 pages_purged += apple_protect_pager_purge_all();
511 pages_purged += shared_region_pager_purge_all();
512 pages_purged += dyld_pager_purge_all();
513 #if DEVELOPMENT || DEBUG
514 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
515 #endif /* DEVELOPMENT || DEBUG */
516 return pages_purged;
517 }
518
519 int
useracc(user_addr_t addr,user_size_t len,int prot)520 useracc(
521 user_addr_t addr,
522 user_size_t len,
523 int prot)
524 {
525 vm_map_t map;
526
527 map = current_map();
528 return vm_map_check_protection(
529 map,
530 vm_map_trunc_page(addr,
531 vm_map_page_mask(map)),
532 vm_map_round_page(addr + len,
533 vm_map_page_mask(map)),
534 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
535 }
536
537 int
vslock(user_addr_t addr,user_size_t len)538 vslock(
539 user_addr_t addr,
540 user_size_t len)
541 {
542 kern_return_t kret;
543 vm_map_t map;
544
545 map = current_map();
546 kret = vm_map_wire_kernel(map,
547 vm_map_trunc_page(addr,
548 vm_map_page_mask(map)),
549 vm_map_round_page(addr + len,
550 vm_map_page_mask(map)),
551 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
552 FALSE);
553
554 switch (kret) {
555 case KERN_SUCCESS:
556 return 0;
557 case KERN_INVALID_ADDRESS:
558 case KERN_NO_SPACE:
559 return ENOMEM;
560 case KERN_PROTECTION_FAILURE:
561 return EACCES;
562 default:
563 return EINVAL;
564 }
565 }
566
567 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)568 vsunlock(
569 user_addr_t addr,
570 user_size_t len,
571 __unused int dirtied)
572 {
573 #if FIXME /* [ */
574 pmap_t pmap;
575 vm_page_t pg;
576 vm_map_offset_t vaddr;
577 ppnum_t paddr;
578 #endif /* FIXME ] */
579 kern_return_t kret;
580 vm_map_t map;
581
582 map = current_map();
583
584 #if FIXME /* [ */
585 if (dirtied) {
586 pmap = get_task_pmap(current_task());
587 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
588 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
589 vaddr += PAGE_SIZE) {
590 paddr = pmap_find_phys(pmap, vaddr);
591 pg = PHYS_TO_VM_PAGE(paddr);
592 vm_page_set_modified(pg);
593 }
594 }
595 #endif /* FIXME ] */
596 #ifdef lint
597 dirtied++;
598 #endif /* lint */
599 kret = vm_map_unwire(map,
600 vm_map_trunc_page(addr,
601 vm_map_page_mask(map)),
602 vm_map_round_page(addr + len,
603 vm_map_page_mask(map)),
604 FALSE);
605 switch (kret) {
606 case KERN_SUCCESS:
607 return 0;
608 case KERN_INVALID_ADDRESS:
609 case KERN_NO_SPACE:
610 return ENOMEM;
611 case KERN_PROTECTION_FAILURE:
612 return EACCES;
613 default:
614 return EINVAL;
615 }
616 }
617
618 int
subyte(user_addr_t addr,int byte)619 subyte(
620 user_addr_t addr,
621 int byte)
622 {
623 char character;
624
625 character = (char)byte;
626 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
627 }
628
629 int
suibyte(user_addr_t addr,int byte)630 suibyte(
631 user_addr_t addr,
632 int byte)
633 {
634 char character;
635
636 character = (char)byte;
637 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
638 }
639
640 int
fubyte(user_addr_t addr)641 fubyte(user_addr_t addr)
642 {
643 unsigned char byte;
644
645 if (copyin(addr, (void *) &byte, sizeof(char))) {
646 return -1;
647 }
648 return byte;
649 }
650
651 int
fuibyte(user_addr_t addr)652 fuibyte(user_addr_t addr)
653 {
654 unsigned char byte;
655
656 if (copyin(addr, (void *) &(byte), sizeof(char))) {
657 return -1;
658 }
659 return byte;
660 }
661
662 int
suword(user_addr_t addr,long word)663 suword(
664 user_addr_t addr,
665 long word)
666 {
667 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
668 }
669
670 long
fuword(user_addr_t addr)671 fuword(user_addr_t addr)
672 {
673 long word = 0;
674
675 if (copyin(addr, (void *) &word, sizeof(int))) {
676 return -1;
677 }
678 return word;
679 }
680
681 /* suiword and fuiword are the same as suword and fuword, respectively */
682
683 int
suiword(user_addr_t addr,long word)684 suiword(
685 user_addr_t addr,
686 long word)
687 {
688 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
689 }
690
691 long
fuiword(user_addr_t addr)692 fuiword(user_addr_t addr)
693 {
694 long word = 0;
695
696 if (copyin(addr, (void *) &word, sizeof(int))) {
697 return -1;
698 }
699 return word;
700 }
701
702 /*
703 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
704 * fetching and setting of process-sized size_t and pointer values.
705 */
706 int
sulong(user_addr_t addr,int64_t word)707 sulong(user_addr_t addr, int64_t word)
708 {
709 if (IS_64BIT_PROCESS(current_proc())) {
710 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
711 } else {
712 return suiword(addr, (long)word);
713 }
714 }
715
716 int64_t
fulong(user_addr_t addr)717 fulong(user_addr_t addr)
718 {
719 int64_t longword;
720
721 if (IS_64BIT_PROCESS(current_proc())) {
722 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
723 return -1;
724 }
725 return longword;
726 } else {
727 return (int64_t)fuiword(addr);
728 }
729 }
730
731 int
suulong(user_addr_t addr,uint64_t uword)732 suulong(user_addr_t addr, uint64_t uword)
733 {
734 if (IS_64BIT_PROCESS(current_proc())) {
735 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
736 } else {
737 return suiword(addr, (uint32_t)uword);
738 }
739 }
740
741 uint64_t
fuulong(user_addr_t addr)742 fuulong(user_addr_t addr)
743 {
744 uint64_t ulongword;
745
746 if (IS_64BIT_PROCESS(current_proc())) {
747 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
748 return -1ULL;
749 }
750 return ulongword;
751 } else {
752 return (uint64_t)fuiword(addr);
753 }
754 }
755
756 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)757 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
758 {
759 return ENOTSUP;
760 }
761
762 /*
763 * pid_for_task
764 *
765 * Find the BSD process ID for the Mach task associated with the given Mach port
766 * name
767 *
768 * Parameters: args User argument descriptor (see below)
769 *
770 * Indirect parameters: args->t Mach port name
771 * args->pid Process ID (returned value; see below)
772 *
773 * Returns: KERL_SUCCESS Success
774 * KERN_FAILURE Not success
775 *
776 * Implicit returns: args->pid Process ID
777 *
778 */
779 kern_return_t
pid_for_task(struct pid_for_task_args * args)780 pid_for_task(
781 struct pid_for_task_args *args)
782 {
783 mach_port_name_t t = args->t;
784 user_addr_t pid_addr = args->pid;
785 proc_t p;
786 task_t t1;
787 int pid = -1;
788 kern_return_t err = KERN_SUCCESS;
789
790 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
791 AUDIT_ARG(mach_port1, t);
792
793 t1 = port_name_to_task_name(t);
794
795 if (t1 == TASK_NULL) {
796 err = KERN_FAILURE;
797 goto pftout;
798 } else {
799 p = get_bsdtask_info(t1);
800 if (p) {
801 pid = proc_pid(p);
802 err = KERN_SUCCESS;
803 } else if (task_is_a_corpse(t1)) {
804 pid = task_pid(t1);
805 err = KERN_SUCCESS;
806 } else {
807 err = KERN_FAILURE;
808 }
809 }
810 task_deallocate(t1);
811 pftout:
812 AUDIT_ARG(pid, pid);
813 (void) copyout((char *) &pid, pid_addr, sizeof(int));
814 AUDIT_MACH_SYSCALL_EXIT(err);
815 return err;
816 }
817
818 /*
819 *
820 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
821 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
822 *
823 */
824 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
825
826 /*
827 * Routine: task_for_pid_posix_check
828 * Purpose:
829 * Verify that the current process should be allowed to
830 * get the target process's task port. This is only
831 * permitted if:
832 * - The current process is root
833 * OR all of the following are true:
834 * - The target process's real, effective, and saved uids
835 * are the same as the current proc's euid,
836 * - The target process's group set is a subset of the
837 * calling process's group set, and
838 * - The target process hasn't switched credentials.
839 *
840 * Returns: TRUE: permitted
841 * FALSE: denied
842 */
843 static int
task_for_pid_posix_check(proc_t target)844 task_for_pid_posix_check(proc_t target)
845 {
846 kauth_cred_t targetcred, mycred;
847 bool checkcredentials;
848 uid_t myuid;
849 int allowed;
850
851 /* No task_for_pid on bad targets */
852 if (target->p_stat == SZOMB) {
853 return FALSE;
854 }
855
856 mycred = kauth_cred_get();
857 myuid = kauth_cred_getuid(mycred);
858
859 /* If we're running as root, the check passes */
860 if (kauth_cred_issuser(mycred)) {
861 return TRUE;
862 }
863
864 /* We're allowed to get our own task port */
865 if (target == current_proc()) {
866 return TRUE;
867 }
868
869 /*
870 * Under DENY, only root can get another proc's task port,
871 * so no more checks are needed.
872 */
873 if (tfp_policy == KERN_TFP_POLICY_DENY) {
874 return FALSE;
875 }
876
877 targetcred = kauth_cred_proc_ref(target);
878 allowed = TRUE;
879
880 checkcredentials = !proc_is_third_party_debuggable_driver(target);
881
882 if (checkcredentials) {
883 /* Do target's ruid, euid, and saved uid match my euid? */
884 if ((kauth_cred_getuid(targetcred) != myuid) ||
885 (kauth_cred_getruid(targetcred) != myuid) ||
886 (kauth_cred_getsvuid(targetcred) != myuid)) {
887 allowed = FALSE;
888 goto out;
889 }
890 /* Are target's groups a subset of my groups? */
891 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
892 allowed == 0) {
893 allowed = FALSE;
894 goto out;
895 }
896 }
897
898 /* Has target switched credentials? */
899 if (target->p_flag & P_SUGID) {
900 allowed = FALSE;
901 goto out;
902 }
903
904 out:
905 kauth_cred_unref(&targetcred);
906 return allowed;
907 }
908
909 /*
910 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
911 *
912 * Description: Waits for the user space daemon to respond to the request
913 * we made. Function declared non inline to be visible in
914 * stackshots and spindumps as well as debugging.
915 */
916 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)917 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
918 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
919 {
920 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
921 }
922
923 /*
924 * Routine: task_for_pid
925 * Purpose:
926 * Get the task port for another "process", named by its
927 * process ID on the same host as "target_task".
928 *
929 * Only permitted to privileged processes, or processes
930 * with the same user ID.
931 *
932 * Note: if pid == 0, an error is return no matter who is calling.
933 *
934 * XXX This should be a BSD system call, not a Mach trap!!!
935 */
936 kern_return_t
task_for_pid(struct task_for_pid_args * args)937 task_for_pid(
938 struct task_for_pid_args *args)
939 {
940 mach_port_name_t target_tport = args->target_tport;
941 int pid = args->pid;
942 user_addr_t task_addr = args->t;
943 proc_t p = PROC_NULL;
944 task_t t1 = TASK_NULL;
945 task_t task = TASK_NULL;
946 mach_port_name_t tret = MACH_PORT_NULL;
947 ipc_port_t tfpport = MACH_PORT_NULL;
948 void * sright = NULL;
949 int error = 0;
950 boolean_t is_current_proc = FALSE;
951 struct proc_ident pident = {0};
952
953 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
954 AUDIT_ARG(pid, pid);
955 AUDIT_ARG(mach_port1, target_tport);
956
957 /* Always check if pid == 0 */
958 if (pid == 0) {
959 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
960 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
961 return KERN_FAILURE;
962 }
963
964 t1 = port_name_to_task(target_tport);
965 if (t1 == TASK_NULL) {
966 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
967 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
968 return KERN_FAILURE;
969 }
970
971
972 p = proc_find(pid);
973 if (p == PROC_NULL) {
974 error = KERN_FAILURE;
975 goto tfpout;
976 }
977 pident = proc_ident(p);
978 is_current_proc = (p == current_proc());
979
980 #if CONFIG_AUDIT
981 AUDIT_ARG(process, p);
982 #endif
983
984 if (!(task_for_pid_posix_check(p))) {
985 error = KERN_FAILURE;
986 goto tfpout;
987 }
988
989 if (proc_task(p) == TASK_NULL) {
990 error = KERN_SUCCESS;
991 goto tfpout;
992 }
993
994 /*
995 * Grab a task reference and drop the proc reference as the proc ref
996 * shouldn't be held accross upcalls.
997 */
998 task = proc_task(p);
999 task_reference(task);
1000
1001 proc_rele(p);
1002 p = PROC_NULL;
1003
1004 #if CONFIG_MACF
1005 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1006 if (error) {
1007 error = KERN_FAILURE;
1008 goto tfpout;
1009 }
1010 #endif
1011
1012 /* If we aren't root and target's task access port is set... */
1013 if (!kauth_cred_issuser(kauth_cred_get()) &&
1014 !is_current_proc &&
1015 (task_get_task_access_port(task, &tfpport) == 0) &&
1016 (tfpport != IPC_PORT_NULL)) {
1017 if (tfpport == IPC_PORT_DEAD) {
1018 error = KERN_PROTECTION_FAILURE;
1019 goto tfpout;
1020 }
1021
1022 /* Call up to the task access server */
1023 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1024 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1025
1026 if (error != MACH_MSG_SUCCESS) {
1027 if (error == MACH_RCV_INTERRUPTED) {
1028 error = KERN_ABORTED;
1029 } else {
1030 error = KERN_FAILURE;
1031 }
1032 goto tfpout;
1033 }
1034 }
1035
1036 /* Grant task port access */
1037 extmod_statistics_incr_task_for_pid(task);
1038
1039 /* this reference will be consumed during conversion */
1040 task_reference(task);
1041 if (task == current_task()) {
1042 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
1043 sright = (void *)convert_task_to_port_pinned(task);
1044 } else {
1045 sright = (void *)convert_task_to_port(task);
1046 }
1047 /* extra task ref consumed */
1048
1049 /*
1050 * Check if the task has been corpsified. We must do so after conversion
1051 * since we don't hold locks and may have grabbed a corpse control port
1052 * above which will prevent no-senders notification delivery.
1053 */
1054 if (task_is_a_corpse(task)) {
1055 ipc_port_release_send(sright);
1056 error = KERN_FAILURE;
1057 goto tfpout;
1058 }
1059
1060 tret = ipc_port_copyout_send(
1061 sright,
1062 get_task_ipcspace(current_task()));
1063
1064 error = KERN_SUCCESS;
1065
1066 tfpout:
1067 task_deallocate(t1);
1068 AUDIT_ARG(mach_port2, tret);
1069 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1070
1071 if (tfpport != IPC_PORT_NULL) {
1072 ipc_port_release_send(tfpport);
1073 }
1074 if (task != TASK_NULL) {
1075 task_deallocate(task);
1076 }
1077 if (p != PROC_NULL) {
1078 proc_rele(p);
1079 }
1080 AUDIT_MACH_SYSCALL_EXIT(error);
1081 return error;
1082 }
1083
1084 /*
1085 * Routine: task_name_for_pid
1086 * Purpose:
1087 * Get the task name port for another "process", named by its
1088 * process ID on the same host as "target_task".
1089 *
1090 * Only permitted to privileged processes, or processes
1091 * with the same user ID.
1092 *
1093 * XXX This should be a BSD system call, not a Mach trap!!!
1094 */
1095
1096 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1097 task_name_for_pid(
1098 struct task_name_for_pid_args *args)
1099 {
1100 mach_port_name_t target_tport = args->target_tport;
1101 int pid = args->pid;
1102 user_addr_t task_addr = args->t;
1103 proc_t p = PROC_NULL;
1104 task_t t1 = TASK_NULL;
1105 mach_port_name_t tret = MACH_PORT_NULL;
1106 void * sright;
1107 int error = 0, refheld = 0;
1108 kauth_cred_t target_cred;
1109
1110 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1111 AUDIT_ARG(pid, pid);
1112 AUDIT_ARG(mach_port1, target_tport);
1113
1114 t1 = port_name_to_task(target_tport);
1115 if (t1 == TASK_NULL) {
1116 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1117 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1118 return KERN_FAILURE;
1119 }
1120
1121 p = proc_find(pid);
1122 if (p != PROC_NULL) {
1123 AUDIT_ARG(process, p);
1124 target_cred = kauth_cred_proc_ref(p);
1125 refheld = 1;
1126
1127 if ((p->p_stat != SZOMB)
1128 && ((current_proc() == p)
1129 || kauth_cred_issuser(kauth_cred_get())
1130 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1131 ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1132 || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1133 )) {
1134 if (proc_task(p) != TASK_NULL) {
1135 struct proc_ident pident = proc_ident(p);
1136
1137 task_t task = proc_task(p);
1138
1139 task_reference(task);
1140 proc_rele(p);
1141 p = PROC_NULL;
1142 #if CONFIG_MACF
1143 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1144 if (error) {
1145 task_deallocate(task);
1146 goto noperm;
1147 }
1148 #endif
1149 sright = (void *)convert_task_name_to_port(task);
1150 task = NULL;
1151 tret = ipc_port_copyout_send(sright,
1152 get_task_ipcspace(current_task()));
1153 } else {
1154 tret = MACH_PORT_NULL;
1155 }
1156
1157 AUDIT_ARG(mach_port2, tret);
1158 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1159 task_deallocate(t1);
1160 error = KERN_SUCCESS;
1161 goto tnfpout;
1162 }
1163 }
1164
1165 #if CONFIG_MACF
1166 noperm:
1167 #endif
1168 task_deallocate(t1);
1169 tret = MACH_PORT_NULL;
1170 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1171 error = KERN_FAILURE;
1172 tnfpout:
1173 if (refheld != 0) {
1174 kauth_cred_unref(&target_cred);
1175 }
1176 if (p != PROC_NULL) {
1177 proc_rele(p);
1178 }
1179 AUDIT_MACH_SYSCALL_EXIT(error);
1180 return error;
1181 }
1182
1183 /*
1184 * Routine: task_inspect_for_pid
1185 * Purpose:
1186 * Get the task inspect port for another "process", named by its
1187 * process ID on the same host as "target_task".
1188 */
1189 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1190 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1191 {
1192 mach_port_name_t target_tport = args->target_tport;
1193 int pid = args->pid;
1194 user_addr_t task_addr = args->t;
1195
1196 proc_t proc = PROC_NULL;
1197 task_t t1 = TASK_NULL;
1198 task_inspect_t task_insp = TASK_INSPECT_NULL;
1199 mach_port_name_t tret = MACH_PORT_NULL;
1200 ipc_port_t tfpport = MACH_PORT_NULL;
1201 int error = 0;
1202 void *sright = NULL;
1203 boolean_t is_current_proc = FALSE;
1204 struct proc_ident pident = {0};
1205
1206 /* Disallow inspect port for kernel_task */
1207 if (pid == 0) {
1208 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1209 return EPERM;
1210 }
1211
1212 t1 = port_name_to_task(target_tport);
1213 if (t1 == TASK_NULL) {
1214 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1215 return EINVAL;
1216 }
1217
1218 proc = proc_find(pid);
1219 if (proc == PROC_NULL) {
1220 error = ESRCH;
1221 goto tifpout;
1222 }
1223 pident = proc_ident(proc);
1224 is_current_proc = (proc == current_proc());
1225
1226 if (!(task_for_pid_posix_check(proc))) {
1227 error = EPERM;
1228 goto tifpout;
1229 }
1230
1231 task_insp = proc_task(proc);
1232 if (task_insp == TASK_INSPECT_NULL) {
1233 goto tifpout;
1234 }
1235
1236 /*
1237 * Grab a task reference and drop the proc reference before making any upcalls.
1238 */
1239 task_reference(task_insp);
1240
1241 proc_rele(proc);
1242 proc = PROC_NULL;
1243
1244 #if CONFIG_MACF
1245 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1246 if (error) {
1247 error = EPERM;
1248 goto tifpout;
1249 }
1250 #endif
1251
1252 /* If we aren't root and target's task access port is set... */
1253 if (!kauth_cred_issuser(kauth_cred_get()) &&
1254 !is_current_proc &&
1255 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1256 (tfpport != IPC_PORT_NULL)) {
1257 if (tfpport == IPC_PORT_DEAD) {
1258 error = EACCES;
1259 goto tifpout;
1260 }
1261
1262
1263 /* Call up to the task access server */
1264 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1265 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1266
1267 if (error != MACH_MSG_SUCCESS) {
1268 if (error == MACH_RCV_INTERRUPTED) {
1269 error = EINTR;
1270 } else {
1271 error = EPERM;
1272 }
1273 goto tifpout;
1274 }
1275 }
1276
1277 /* Check if the task has been corpsified */
1278 if (task_is_a_corpse(task_insp)) {
1279 error = EACCES;
1280 goto tifpout;
1281 }
1282
1283 /* could be IP_NULL, consumes a ref */
1284 sright = (void*) convert_task_inspect_to_port(task_insp);
1285 task_insp = TASK_INSPECT_NULL;
1286 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1287
1288 tifpout:
1289 task_deallocate(t1);
1290 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1291 if (proc != PROC_NULL) {
1292 proc_rele(proc);
1293 }
1294 if (tfpport != IPC_PORT_NULL) {
1295 ipc_port_release_send(tfpport);
1296 }
1297 if (task_insp != TASK_INSPECT_NULL) {
1298 task_deallocate(task_insp);
1299 }
1300
1301 *ret = error;
1302 return error;
1303 }
1304
1305 /*
1306 * Routine: task_read_for_pid
1307 * Purpose:
1308 * Get the task read port for another "process", named by its
1309 * process ID on the same host as "target_task".
1310 */
1311 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1312 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1313 {
1314 mach_port_name_t target_tport = args->target_tport;
1315 int pid = args->pid;
1316 user_addr_t task_addr = args->t;
1317
1318 proc_t proc = PROC_NULL;
1319 task_t t1 = TASK_NULL;
1320 task_read_t task_read = TASK_READ_NULL;
1321 mach_port_name_t tret = MACH_PORT_NULL;
1322 ipc_port_t tfpport = MACH_PORT_NULL;
1323 int error = 0;
1324 void *sright = NULL;
1325 boolean_t is_current_proc = FALSE;
1326 struct proc_ident pident = {0};
1327
1328 /* Disallow read port for kernel_task */
1329 if (pid == 0) {
1330 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1331 return EPERM;
1332 }
1333
1334 t1 = port_name_to_task(target_tport);
1335 if (t1 == TASK_NULL) {
1336 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1337 return EINVAL;
1338 }
1339
1340 proc = proc_find(pid);
1341 if (proc == PROC_NULL) {
1342 error = ESRCH;
1343 goto trfpout;
1344 }
1345 pident = proc_ident(proc);
1346 is_current_proc = (proc == current_proc());
1347
1348 if (!(task_for_pid_posix_check(proc))) {
1349 error = EPERM;
1350 goto trfpout;
1351 }
1352
1353 task_read = proc_task(proc);
1354 if (task_read == TASK_INSPECT_NULL) {
1355 goto trfpout;
1356 }
1357
1358 /*
1359 * Grab a task reference and drop the proc reference before making any upcalls.
1360 */
1361 task_reference(task_read);
1362
1363 proc_rele(proc);
1364 proc = PROC_NULL;
1365
1366 #if CONFIG_MACF
1367 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1368 if (error) {
1369 error = EPERM;
1370 goto trfpout;
1371 }
1372 #endif
1373
1374 /* If we aren't root and target's task access port is set... */
1375 if (!kauth_cred_issuser(kauth_cred_get()) &&
1376 !is_current_proc &&
1377 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1378 (tfpport != IPC_PORT_NULL)) {
1379 if (tfpport == IPC_PORT_DEAD) {
1380 error = EACCES;
1381 goto trfpout;
1382 }
1383
1384
1385 /* Call up to the task access server */
1386 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1387 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1388
1389 if (error != MACH_MSG_SUCCESS) {
1390 if (error == MACH_RCV_INTERRUPTED) {
1391 error = EINTR;
1392 } else {
1393 error = EPERM;
1394 }
1395 goto trfpout;
1396 }
1397 }
1398
1399 /* Check if the task has been corpsified */
1400 if (task_is_a_corpse(task_read)) {
1401 error = EACCES;
1402 goto trfpout;
1403 }
1404
1405 /* could be IP_NULL, consumes a ref */
1406 sright = (void*) convert_task_read_to_port(task_read);
1407 task_read = TASK_READ_NULL;
1408 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1409
1410 trfpout:
1411 task_deallocate(t1);
1412 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1413 if (proc != PROC_NULL) {
1414 proc_rele(proc);
1415 }
1416 if (tfpport != IPC_PORT_NULL) {
1417 ipc_port_release_send(tfpport);
1418 }
1419 if (task_read != TASK_READ_NULL) {
1420 task_deallocate(task_read);
1421 }
1422
1423 *ret = error;
1424 return error;
1425 }
1426
1427 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1428 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1429 {
1430 task_t target = NULL;
1431 proc_t targetproc = PROC_NULL;
1432 int pid = args->pid;
1433 int error = 0;
1434 mach_port_t tfpport = MACH_PORT_NULL;
1435
1436 if (pid == 0) {
1437 error = EPERM;
1438 goto out;
1439 }
1440
1441 targetproc = proc_find(pid);
1442 if (targetproc == PROC_NULL) {
1443 error = ESRCH;
1444 goto out;
1445 }
1446
1447 if (!task_for_pid_posix_check(targetproc) &&
1448 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1449 error = EPERM;
1450 goto out;
1451 }
1452
1453 #if CONFIG_MACF
1454 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1455 if (error) {
1456 error = EPERM;
1457 goto out;
1458 }
1459 #endif
1460
1461 target = proc_task(targetproc);
1462 #if XNU_TARGET_OS_OSX
1463 if (target != TASK_NULL) {
1464 /* If we aren't root and target's task access port is set... */
1465 if (!kauth_cred_issuser(kauth_cred_get()) &&
1466 targetproc != current_proc() &&
1467 (task_get_task_access_port(target, &tfpport) == 0) &&
1468 (tfpport != IPC_PORT_NULL)) {
1469 if (tfpport == IPC_PORT_DEAD) {
1470 error = EACCES;
1471 goto out;
1472 }
1473
1474 /* Call up to the task access server */
1475 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1476 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1477
1478 if (error != MACH_MSG_SUCCESS) {
1479 if (error == MACH_RCV_INTERRUPTED) {
1480 error = EINTR;
1481 } else {
1482 error = EPERM;
1483 }
1484 goto out;
1485 }
1486 }
1487 }
1488 #endif /* XNU_TARGET_OS_OSX */
1489
1490 task_reference(target);
1491 error = task_pidsuspend(target);
1492 if (error) {
1493 if (error == KERN_INVALID_ARGUMENT) {
1494 error = EINVAL;
1495 } else {
1496 error = EPERM;
1497 }
1498 }
1499 #if CONFIG_MEMORYSTATUS
1500 else {
1501 memorystatus_on_suspend(targetproc);
1502 }
1503 #endif
1504
1505 task_deallocate(target);
1506
1507 out:
1508 if (tfpport != IPC_PORT_NULL) {
1509 ipc_port_release_send(tfpport);
1510 }
1511
1512 if (targetproc != PROC_NULL) {
1513 proc_rele(targetproc);
1514 }
1515 *ret = error;
1516 return error;
1517 }
1518
1519 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1520 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1521 {
1522 mach_port_name_t target_tport = args->target_tport;
1523 int pid = args->pid;
1524 user_addr_t task_addr = args->t;
1525 proc_t p = PROC_NULL;
1526 task_t t1 = TASK_NULL;
1527 task_t task = TASK_NULL;
1528 mach_port_name_t tret = MACH_PORT_NULL;
1529 ipc_port_t tfpport = MACH_PORT_NULL;
1530 ipc_port_t sright = NULL;
1531 int error = 0;
1532 boolean_t is_current_proc = FALSE;
1533 struct proc_ident pident = {0};
1534
1535 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1536 AUDIT_ARG(pid, pid);
1537 AUDIT_ARG(mach_port1, target_tport);
1538
1539 /* Always check if pid == 0 */
1540 if (pid == 0) {
1541 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1542 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1543 return KERN_FAILURE;
1544 }
1545
1546 t1 = port_name_to_task(target_tport);
1547 if (t1 == TASK_NULL) {
1548 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1549 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1550 return KERN_FAILURE;
1551 }
1552
1553 p = proc_find(pid);
1554 if (p == PROC_NULL) {
1555 error = KERN_FAILURE;
1556 goto tfpout;
1557 }
1558 pident = proc_ident(p);
1559 is_current_proc = (p == current_proc());
1560
1561 #if CONFIG_AUDIT
1562 AUDIT_ARG(process, p);
1563 #endif
1564
1565 if (!(task_for_pid_posix_check(p))) {
1566 error = KERN_FAILURE;
1567 goto tfpout;
1568 }
1569
1570 if (proc_task(p) == TASK_NULL) {
1571 error = KERN_SUCCESS;
1572 goto tfpout;
1573 }
1574
1575 /*
1576 * Grab a task reference and drop the proc reference before making any upcalls.
1577 */
1578 task = proc_task(p);
1579 task_reference(task);
1580
1581 proc_rele(p);
1582 p = PROC_NULL;
1583
1584 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1585 #if CONFIG_MACF
1586 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1587 if (error) {
1588 error = KERN_FAILURE;
1589 goto tfpout;
1590 }
1591 #endif
1592
1593 /* If we aren't root and target's task access port is set... */
1594 if (!kauth_cred_issuser(kauth_cred_get()) &&
1595 !is_current_proc &&
1596 (task_get_task_access_port(task, &tfpport) == 0) &&
1597 (tfpport != IPC_PORT_NULL)) {
1598 if (tfpport == IPC_PORT_DEAD) {
1599 error = KERN_PROTECTION_FAILURE;
1600 goto tfpout;
1601 }
1602
1603
1604 /* Call up to the task access server */
1605 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1606 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1607
1608 if (error != MACH_MSG_SUCCESS) {
1609 if (error == MACH_RCV_INTERRUPTED) {
1610 error = KERN_ABORTED;
1611 } else {
1612 error = KERN_FAILURE;
1613 }
1614 goto tfpout;
1615 }
1616 }
1617 }
1618
1619 /* Check if the task has been corpsified */
1620 if (task_is_a_corpse(task)) {
1621 error = KERN_FAILURE;
1622 goto tfpout;
1623 }
1624
1625 error = task_get_debug_control_port(task, &sright);
1626 if (error != KERN_SUCCESS) {
1627 goto tfpout;
1628 }
1629
1630 tret = ipc_port_copyout_send(
1631 sright,
1632 get_task_ipcspace(current_task()));
1633
1634 error = KERN_SUCCESS;
1635
1636 tfpout:
1637 task_deallocate(t1);
1638 AUDIT_ARG(mach_port2, tret);
1639 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1640
1641 if (tfpport != IPC_PORT_NULL) {
1642 ipc_port_release_send(tfpport);
1643 }
1644 if (task != TASK_NULL) {
1645 task_deallocate(task);
1646 }
1647 if (p != PROC_NULL) {
1648 proc_rele(p);
1649 }
1650 AUDIT_MACH_SYSCALL_EXIT(error);
1651 return error;
1652 }
1653
1654 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1655 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1656 {
1657 task_t target = NULL;
1658 proc_t targetproc = PROC_NULL;
1659 int pid = args->pid;
1660 int error = 0;
1661 mach_port_t tfpport = MACH_PORT_NULL;
1662
1663 if (pid == 0) {
1664 error = EPERM;
1665 goto out;
1666 }
1667
1668 targetproc = proc_find(pid);
1669 if (targetproc == PROC_NULL) {
1670 error = ESRCH;
1671 goto out;
1672 }
1673
1674 if (!task_for_pid_posix_check(targetproc) &&
1675 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1676 error = EPERM;
1677 goto out;
1678 }
1679
1680 #if CONFIG_MACF
1681 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1682 if (error) {
1683 error = EPERM;
1684 goto out;
1685 }
1686 #endif
1687
1688 target = proc_task(targetproc);
1689 #if XNU_TARGET_OS_OSX
1690 if (target != TASK_NULL) {
1691 /* If we aren't root and target's task access port is set... */
1692 if (!kauth_cred_issuser(kauth_cred_get()) &&
1693 targetproc != current_proc() &&
1694 (task_get_task_access_port(target, &tfpport) == 0) &&
1695 (tfpport != IPC_PORT_NULL)) {
1696 if (tfpport == IPC_PORT_DEAD) {
1697 error = EACCES;
1698 goto out;
1699 }
1700
1701 /* Call up to the task access server */
1702 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1703 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1704
1705 if (error != MACH_MSG_SUCCESS) {
1706 if (error == MACH_RCV_INTERRUPTED) {
1707 error = EINTR;
1708 } else {
1709 error = EPERM;
1710 }
1711 goto out;
1712 }
1713 }
1714 }
1715 #endif /* XNU_TARGET_OS_OSX */
1716
1717 #if !XNU_TARGET_OS_OSX
1718 #if SOCKETS
1719 resume_proc_sockets(targetproc);
1720 #endif /* SOCKETS */
1721 #endif /* !XNU_TARGET_OS_OSX */
1722
1723 task_reference(target);
1724
1725 #if CONFIG_MEMORYSTATUS
1726 memorystatus_on_resume(targetproc);
1727 #endif
1728
1729 error = task_pidresume(target);
1730 if (error) {
1731 if (error == KERN_INVALID_ARGUMENT) {
1732 error = EINVAL;
1733 } else {
1734 if (error == KERN_MEMORY_ERROR) {
1735 psignal(targetproc, SIGKILL);
1736 error = EIO;
1737 } else {
1738 error = EPERM;
1739 }
1740 }
1741 }
1742
1743 task_deallocate(target);
1744
1745 out:
1746 if (tfpport != IPC_PORT_NULL) {
1747 ipc_port_release_send(tfpport);
1748 }
1749
1750 if (targetproc != PROC_NULL) {
1751 proc_rele(targetproc);
1752 }
1753
1754 *ret = error;
1755 return error;
1756 }
1757
1758 #if !XNU_TARGET_OS_OSX
1759 /*
1760 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1761 * When a process is specified, this call is blocking, otherwise we wake up the
1762 * freezer thread and do not block on a process being frozen.
1763 */
1764 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1765 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1766 {
1767 int error = 0;
1768 proc_t targetproc = PROC_NULL;
1769 int pid = args->pid;
1770
1771 #ifndef CONFIG_FREEZE
1772 #pragma unused(pid)
1773 #else
1774
1775 /*
1776 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1777 */
1778
1779 if (pid >= 0) {
1780 targetproc = proc_find(pid);
1781
1782 if (targetproc == PROC_NULL) {
1783 error = ESRCH;
1784 goto out;
1785 }
1786
1787 if (!task_for_pid_posix_check(targetproc)) {
1788 error = EPERM;
1789 goto out;
1790 }
1791 }
1792
1793 #if CONFIG_MACF
1794 //Note that targetproc may be null
1795 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1796 if (error) {
1797 error = EPERM;
1798 goto out;
1799 }
1800 #endif
1801
1802 if (pid == -2) {
1803 vm_pageout_anonymous_pages();
1804 } else if (pid == -1) {
1805 memorystatus_on_inactivity(targetproc);
1806 } else {
1807 error = memorystatus_freeze_process_sync(targetproc);
1808 }
1809
1810 out:
1811
1812 #endif /* CONFIG_FREEZE */
1813
1814 if (targetproc != PROC_NULL) {
1815 proc_rele(targetproc);
1816 }
1817 *ret = error;
1818 return error;
1819 }
1820 #endif /* !XNU_TARGET_OS_OSX */
1821
1822 #if SOCKETS
1823 int
networking_memstatus_callout(proc_t p,uint32_t status)1824 networking_memstatus_callout(proc_t p, uint32_t status)
1825 {
1826 struct fileproc *fp;
1827
1828 /*
1829 * proc list lock NOT held
1830 * proc lock NOT held
1831 * a reference on the proc has been held / shall be dropped by the caller.
1832 */
1833 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1834 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1835
1836 proc_fdlock(p);
1837
1838 fdt_foreach(fp, p) {
1839 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1840 #if NECP
1841 case DTYPE_NETPOLICY:
1842 necp_fd_memstatus(p, status,
1843 (struct necp_fd_data *)fp_get_data(fp));
1844 break;
1845 #endif /* NECP */
1846 #if SKYWALK
1847 case DTYPE_CHANNEL:
1848 kern_channel_memstatus(p, status,
1849 (struct kern_channel *)fp_get_data(fp));
1850 break;
1851 #endif /* SKYWALK */
1852 default:
1853 break;
1854 }
1855 }
1856 proc_fdunlock(p);
1857
1858 return 1;
1859 }
1860
1861 #if SKYWALK
1862 /*
1863 * Since we make multiple passes across the fileproc array, record the
1864 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1865 * large enough to accomodate most, if not all cases. If we find more,
1866 * we'll go to the slow path during second pass.
1867 */
1868 #define MAX_CHANNELS 8 /* should be more than enough */
1869 #endif /* SKYWALK */
1870
1871 static int
networking_defunct_callout(proc_t p,void * arg)1872 networking_defunct_callout(proc_t p, void *arg)
1873 {
1874 struct pid_shutdown_sockets_args *args = arg;
1875 int pid = args->pid;
1876 int level = args->level;
1877 struct fileproc *fp;
1878 #if SKYWALK
1879 int i;
1880 int channel_count = 0;
1881 struct kern_channel *channel_array[MAX_CHANNELS];
1882
1883 bzero(&channel_array, sizeof(channel_array));
1884 #endif /* SKYWALK */
1885
1886 proc_fdlock(p);
1887
1888 fdt_foreach(fp, p) {
1889 struct fileglob *fg = fp->fp_glob;
1890
1891 switch (FILEGLOB_DTYPE(fg)) {
1892 case DTYPE_SOCKET: {
1893 struct socket *so = (struct socket *)fg_get_data(fg);
1894 if (proc_getpid(p) == pid || so->last_pid == pid ||
1895 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1896 /* Call networking stack with socket and level */
1897 (void)socket_defunct(p, so, level);
1898 }
1899 break;
1900 }
1901 #if NECP
1902 case DTYPE_NETPOLICY:
1903 /* first pass: defunct necp and get stats for ntstat */
1904 if (proc_getpid(p) == pid) {
1905 necp_fd_defunct(p,
1906 (struct necp_fd_data *)fg_get_data(fg));
1907 }
1908 break;
1909 #endif /* NECP */
1910 #if SKYWALK
1911 case DTYPE_CHANNEL:
1912 /* first pass: get channels and total count */
1913 if (proc_getpid(p) == pid) {
1914 if (channel_count < MAX_CHANNELS) {
1915 channel_array[channel_count] =
1916 (struct kern_channel *)fg_get_data(fg);
1917 }
1918 ++channel_count;
1919 }
1920 break;
1921 #endif /* SKYWALK */
1922 default:
1923 break;
1924 }
1925 }
1926
1927 #if SKYWALK
1928 /*
1929 * Second pass: defunct channels/flows (after NECP). Handle
1930 * the common case of up to MAX_CHANNELS count with fast path,
1931 * and traverse the fileproc array again only if we exceed it.
1932 */
1933 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1934 ASSERT(proc_getpid(p) == pid);
1935 for (i = 0; i < channel_count; i++) {
1936 ASSERT(channel_array[i] != NULL);
1937 kern_channel_defunct(p, channel_array[i]);
1938 }
1939 } else if (channel_count != 0) {
1940 ASSERT(proc_getpid(p) == pid);
1941 fdt_foreach(fp, p) {
1942 struct fileglob *fg = fp->fp_glob;
1943
1944 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1945 kern_channel_defunct(p,
1946 (struct kern_channel *)fg_get_data(fg));
1947 }
1948 }
1949 }
1950 #endif /* SKYWALK */
1951 proc_fdunlock(p);
1952
1953 return PROC_RETURNED;
1954 }
1955
1956 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1957 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1958 {
1959 int error = 0;
1960 proc_t targetproc = PROC_NULL;
1961 int pid = args->pid;
1962 int level = args->level;
1963
1964 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1965 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1966 error = EINVAL;
1967 goto out;
1968 }
1969
1970 targetproc = proc_find(pid);
1971 if (targetproc == PROC_NULL) {
1972 error = ESRCH;
1973 goto out;
1974 }
1975
1976 if (!task_for_pid_posix_check(targetproc) &&
1977 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1978 error = EPERM;
1979 goto out;
1980 }
1981
1982 #if CONFIG_MACF
1983 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1984 if (error) {
1985 error = EPERM;
1986 goto out;
1987 }
1988 #endif
1989
1990 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1991 networking_defunct_callout, args, NULL, NULL);
1992
1993 out:
1994 if (targetproc != PROC_NULL) {
1995 proc_rele(targetproc);
1996 }
1997 *ret = error;
1998 return error;
1999 }
2000
2001 #endif /* SOCKETS */
2002
2003 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)2004 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2005 __unused int arg2, struct sysctl_req *req)
2006 {
2007 int error = 0;
2008 int new_value;
2009
2010 error = SYSCTL_OUT(req, arg1, sizeof(int));
2011 if (error || req->newptr == USER_ADDR_NULL) {
2012 return error;
2013 }
2014
2015 if (!kauth_cred_issuser(kauth_cred_get())) {
2016 return EPERM;
2017 }
2018
2019 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2020 goto out;
2021 }
2022 if ((new_value == KERN_TFP_POLICY_DENY)
2023 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2024 tfp_policy = new_value;
2025 } else {
2026 error = EINVAL;
2027 }
2028 out:
2029 return error;
2030 }
2031
2032 #if defined(SECURE_KERNEL)
2033 static int kern_secure_kernel = 1;
2034 #else
2035 static int kern_secure_kernel = 0;
2036 #endif
2037
2038 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2039
2040 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2041 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2042 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2043
2044 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2045 &shared_region_trace_level, 0, "");
2046 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2047 &shared_region_version, 0, "");
2048 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2049 &shared_region_persistence, 0, "");
2050
2051 /*
2052 * shared_region_check_np:
2053 *
2054 * This system call is intended for dyld.
2055 *
2056 * dyld calls this when any process starts to see if the process's shared
2057 * region is already set up and ready to use.
2058 * This call returns the base address of the first mapping in the
2059 * process's shared region's first mapping.
2060 * dyld will then check what's mapped at that address.
2061 *
2062 * If the shared region is empty, dyld will then attempt to map the shared
2063 * cache file in the shared region via the shared_region_map_np() system call.
2064 *
2065 * If something's already mapped in the shared region, dyld will check if it
2066 * matches the shared cache it would like to use for that process.
2067 * If it matches, evrything's ready and the process can proceed and use the
2068 * shared region.
2069 * If it doesn't match, dyld will unmap the shared region and map the shared
2070 * cache into the process's address space via mmap().
2071 *
2072 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2073 * the shared region. We will remove the shared_region reference from the task.
2074 *
2075 * ERROR VALUES
2076 * EINVAL no shared region
2077 * ENOMEM shared region is empty
2078 * EFAULT bad address for "start_address"
2079 */
2080 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2081 shared_region_check_np(
2082 __unused struct proc *p,
2083 struct shared_region_check_np_args *uap,
2084 __unused int *retvalp)
2085 {
2086 vm_shared_region_t shared_region;
2087 mach_vm_offset_t start_address = 0;
2088 int error = 0;
2089 kern_return_t kr;
2090 task_t task = current_task();
2091
2092 SHARED_REGION_TRACE_DEBUG(
2093 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2094 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2095 proc_getpid(p), p->p_comm,
2096 (uint64_t)uap->start_address));
2097
2098 /*
2099 * Special value of start_address used to indicate that map_with_linking() should
2100 * no longer be allowed in this process
2101 */
2102 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2103 p->p_disallow_map_with_linking = TRUE;
2104 return 0;
2105 }
2106
2107 /* retrieve the current tasks's shared region */
2108 shared_region = vm_shared_region_get(task);
2109 if (shared_region != NULL) {
2110 /*
2111 * A NULL argument is used by dyld to indicate the task
2112 * has unmapped its shared region.
2113 */
2114 if (uap->start_address == 0) {
2115 /* unmap it first */
2116 vm_shared_region_remove(task, shared_region);
2117 vm_shared_region_set(task, NULL);
2118 } else {
2119 /* retrieve address of its first mapping... */
2120 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2121 if (kr != KERN_SUCCESS) {
2122 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2123 "check_np(0x%llx) "
2124 "vm_shared_region_start_address() failed\n",
2125 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2126 proc_getpid(p), p->p_comm,
2127 (uint64_t)uap->start_address));
2128 error = ENOMEM;
2129 } else {
2130 #if __has_feature(ptrauth_calls)
2131 /*
2132 * Remap any section of the shared library that
2133 * has authenticated pointers into private memory.
2134 */
2135 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2136 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2137 "check_np(0x%llx) "
2138 "vm_shared_region_auth_remap() failed\n",
2139 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2140 proc_getpid(p), p->p_comm,
2141 (uint64_t)uap->start_address));
2142 error = ENOMEM;
2143 }
2144 #endif /* __has_feature(ptrauth_calls) */
2145
2146 /* ... and give it to the caller */
2147 if (error == 0) {
2148 error = copyout(&start_address,
2149 (user_addr_t) uap->start_address,
2150 sizeof(start_address));
2151 if (error != 0) {
2152 SHARED_REGION_TRACE_ERROR(
2153 ("shared_region: %p [%d(%s)] "
2154 "check_np(0x%llx) "
2155 "copyout(0x%llx) error %d\n",
2156 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2157 proc_getpid(p), p->p_comm,
2158 (uint64_t)uap->start_address, (uint64_t)start_address,
2159 error));
2160 }
2161 }
2162 }
2163 }
2164 vm_shared_region_deallocate(shared_region);
2165 } else {
2166 /* no shared region ! */
2167 error = EINVAL;
2168 }
2169
2170 SHARED_REGION_TRACE_DEBUG(
2171 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2172 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2173 proc_getpid(p), p->p_comm,
2174 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2175
2176 return error;
2177 }
2178
2179
2180 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2181 shared_region_copyin(
2182 struct proc *p,
2183 user_addr_t user_addr,
2184 unsigned int count,
2185 unsigned int element_size,
2186 void *kernel_data)
2187 {
2188 int error = 0;
2189 vm_size_t size = count * element_size;
2190
2191 error = copyin(user_addr, kernel_data, size);
2192 if (error) {
2193 SHARED_REGION_TRACE_ERROR(
2194 ("shared_region: %p [%d(%s)] map(): "
2195 "copyin(0x%llx, %ld) failed (error=%d)\n",
2196 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2197 proc_getpid(p), p->p_comm,
2198 (uint64_t)user_addr, (long)size, error));
2199 }
2200 return error;
2201 }
2202
2203 /*
2204 * A reasonable upper limit to prevent overflow of allocation/copyin.
2205 */
2206 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2207
2208 /* forward declaration */
2209 __attribute__((noinline))
2210 static void shared_region_map_and_slide_cleanup(
2211 struct proc *p,
2212 uint32_t files_count,
2213 struct _sr_file_mappings *sr_file_mappings,
2214 struct vm_shared_region *shared_region);
2215
2216 /*
2217 * Setup part of _shared_region_map_and_slide().
2218 * It had to be broken out of _shared_region_map_and_slide() to
2219 * prevent compiler inlining from blowing out the stack.
2220 */
2221 __attribute__((noinline))
2222 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2223 shared_region_map_and_slide_setup(
2224 struct proc *p,
2225 uint32_t files_count,
2226 struct shared_file_np *files,
2227 uint32_t mappings_count,
2228 struct shared_file_mapping_slide_np *mappings,
2229 struct _sr_file_mappings **sr_file_mappings,
2230 struct vm_shared_region **shared_region_ptr,
2231 struct vnode *rdir_vp)
2232 {
2233 int error = 0;
2234 struct _sr_file_mappings *srfmp;
2235 uint32_t mappings_next;
2236 struct vnode_attr va;
2237 off_t fs;
2238 #if CONFIG_MACF
2239 vm_prot_t maxprot = VM_PROT_ALL;
2240 #endif
2241 uint32_t i;
2242 struct vm_shared_region *shared_region = NULL;
2243 boolean_t is_driverkit = task_is_driver(current_task());
2244
2245 SHARED_REGION_TRACE_DEBUG(
2246 ("shared_region: %p [%d(%s)] -> map\n",
2247 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2248 proc_getpid(p), p->p_comm));
2249
2250 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2251 error = E2BIG;
2252 goto done;
2253 }
2254 if (files_count == 0) {
2255 error = EINVAL;
2256 goto done;
2257 }
2258 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2259 Z_WAITOK | Z_ZERO);
2260 if (*sr_file_mappings == NULL) {
2261 error = ENOMEM;
2262 goto done;
2263 }
2264 mappings_next = 0;
2265 for (i = 0; i < files_count; i++) {
2266 srfmp = &(*sr_file_mappings)[i];
2267 srfmp->fd = files[i].sf_fd;
2268 srfmp->mappings_count = files[i].sf_mappings_count;
2269 srfmp->mappings = &mappings[mappings_next];
2270 mappings_next += srfmp->mappings_count;
2271 if (mappings_next > mappings_count) {
2272 error = EINVAL;
2273 goto done;
2274 }
2275 srfmp->slide = files[i].sf_slide;
2276 }
2277
2278 /* get the process's shared region (setup in vm_map_exec()) */
2279 shared_region = vm_shared_region_trim_and_get(current_task());
2280 *shared_region_ptr = shared_region;
2281 if (shared_region == NULL) {
2282 SHARED_REGION_TRACE_ERROR(
2283 ("shared_region: %p [%d(%s)] map(): "
2284 "no shared region\n",
2285 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2286 proc_getpid(p), p->p_comm));
2287 error = EINVAL;
2288 goto done;
2289 }
2290
2291 /*
2292 * Check the shared region matches the current root
2293 * directory of this process. Deny the mapping to
2294 * avoid tainting the shared region with something that
2295 * doesn't quite belong into it.
2296 */
2297 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2298 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2299 SHARED_REGION_TRACE_ERROR(
2300 ("shared_region: map(%p) root_dir mismatch\n",
2301 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2302 error = EPERM;
2303 goto done;
2304 }
2305
2306
2307 for (srfmp = &(*sr_file_mappings)[0];
2308 srfmp < &(*sr_file_mappings)[files_count];
2309 srfmp++) {
2310 if (srfmp->mappings_count == 0) {
2311 /* no mappings here... */
2312 continue;
2313 }
2314
2315 /*
2316 * A file descriptor of -1 is used to indicate that the data
2317 * to be put in the shared region for this mapping comes directly
2318 * from the processes address space. Ensure we have proper alignments.
2319 */
2320 if (srfmp->fd == -1) {
2321 /* only allow one mapping per fd */
2322 if (srfmp->mappings_count > 1) {
2323 SHARED_REGION_TRACE_ERROR(
2324 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
2325 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2326 proc_getpid(p), p->p_comm));
2327 error = EINVAL;
2328 goto done;
2329 }
2330
2331 /*
2332 * The destination address and size must be page aligned.
2333 */
2334 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2335 mach_vm_address_t dest_addr = mapping->sms_address;
2336 mach_vm_size_t map_size = mapping->sms_size;
2337 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2338 SHARED_REGION_TRACE_ERROR(
2339 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2340 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2341 proc_getpid(p), p->p_comm, dest_addr));
2342 error = EINVAL;
2343 goto done;
2344 }
2345 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2346 SHARED_REGION_TRACE_ERROR(
2347 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2348 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2349 proc_getpid(p), p->p_comm, map_size));
2350 error = EINVAL;
2351 goto done;
2352 }
2353 continue;
2354 }
2355
2356 /* get file structure from file descriptor */
2357 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2358 if (error) {
2359 SHARED_REGION_TRACE_ERROR(
2360 ("shared_region: %p [%d(%s)] map: "
2361 "fd=%d lookup failed (error=%d)\n",
2362 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2363 proc_getpid(p), p->p_comm, srfmp->fd, error));
2364 goto done;
2365 }
2366
2367 /* we need at least read permission on the file */
2368 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2369 SHARED_REGION_TRACE_ERROR(
2370 ("shared_region: %p [%d(%s)] map: "
2371 "fd=%d not readable\n",
2372 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2373 proc_getpid(p), p->p_comm, srfmp->fd));
2374 error = EPERM;
2375 goto done;
2376 }
2377
2378 /* get vnode from file structure */
2379 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2380 if (error) {
2381 SHARED_REGION_TRACE_ERROR(
2382 ("shared_region: %p [%d(%s)] map: "
2383 "fd=%d getwithref failed (error=%d)\n",
2384 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2385 proc_getpid(p), p->p_comm, srfmp->fd, error));
2386 goto done;
2387 }
2388 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2389
2390 /* make sure the vnode is a regular file */
2391 if (srfmp->vp->v_type != VREG) {
2392 SHARED_REGION_TRACE_ERROR(
2393 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2394 "not a file (type=%d)\n",
2395 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2396 proc_getpid(p), p->p_comm,
2397 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2398 srfmp->vp->v_name, srfmp->vp->v_type));
2399 error = EINVAL;
2400 goto done;
2401 }
2402
2403 #if CONFIG_MACF
2404 /* pass in 0 for the offset argument because AMFI does not need the offset
2405 * of the shared cache */
2406 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2407 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2408 if (error) {
2409 goto done;
2410 }
2411 #endif /* MAC */
2412
2413 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2414 /*
2415 * Check if the shared cache is in the trust cache;
2416 * if so, we can skip the root ownership check.
2417 */
2418 #if DEVELOPMENT || DEBUG
2419 /*
2420 * Skip both root ownership and trust cache check if
2421 * enforcement is disabled.
2422 */
2423 if (!cs_system_enforcement()) {
2424 goto after_root_check;
2425 }
2426 #endif /* DEVELOPMENT || DEBUG */
2427 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2428 if (blob == NULL) {
2429 SHARED_REGION_TRACE_ERROR(
2430 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2431 "missing CS blob\n",
2432 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2433 proc_getpid(p), p->p_comm,
2434 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2435 srfmp->vp->v_name));
2436 goto root_check;
2437 }
2438 const uint8_t *cdhash = csblob_get_cdhash(blob);
2439 if (cdhash == NULL) {
2440 SHARED_REGION_TRACE_ERROR(
2441 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2442 "missing cdhash\n",
2443 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2444 proc_getpid(p), p->p_comm,
2445 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2446 srfmp->vp->v_name));
2447 goto root_check;
2448 }
2449
2450 bool in_trust_cache = false;
2451 TrustCacheQueryToken_t qt;
2452 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2453 TCType_t tc_type = kTCTypeInvalid;
2454 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2455 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2456 (tc_type == kTCTypeCryptex1BootOS ||
2457 tc_type == kTCTypeStatic ||
2458 tc_type == kTCTypeEngineering));
2459 }
2460 if (!in_trust_cache) {
2461 SHARED_REGION_TRACE_ERROR(
2462 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2463 "not in trust cache\n",
2464 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2465 proc_getpid(p), p->p_comm,
2466 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2467 srfmp->vp->v_name));
2468 goto root_check;
2469 }
2470 goto after_root_check;
2471 root_check:
2472 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2473
2474 /* The shared cache file must be owned by root */
2475 VATTR_INIT(&va);
2476 VATTR_WANTED(&va, va_uid);
2477 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2478 if (error) {
2479 SHARED_REGION_TRACE_ERROR(
2480 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2481 "vnode_getattr(%p) failed (error=%d)\n",
2482 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2483 proc_getpid(p), p->p_comm,
2484 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2485 srfmp->vp->v_name,
2486 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2487 error));
2488 goto done;
2489 }
2490 if (va.va_uid != 0) {
2491 SHARED_REGION_TRACE_ERROR(
2492 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2493 "owned by uid=%d instead of 0\n",
2494 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2495 proc_getpid(p), p->p_comm,
2496 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2497 srfmp->vp->v_name, va.va_uid));
2498 error = EPERM;
2499 goto done;
2500 }
2501
2502 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2503 after_root_check:
2504 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2505
2506 #if CONFIG_CSR
2507 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2508 VATTR_INIT(&va);
2509 VATTR_WANTED(&va, va_flags);
2510 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2511 if (error) {
2512 SHARED_REGION_TRACE_ERROR(
2513 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2514 "vnode_getattr(%p) failed (error=%d)\n",
2515 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2516 proc_getpid(p), p->p_comm,
2517 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2518 srfmp->vp->v_name,
2519 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2520 error));
2521 goto done;
2522 }
2523
2524 if (!(va.va_flags & SF_RESTRICTED)) {
2525 /*
2526 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2527 * the shared cache file is NOT SIP-protected, so reject the
2528 * mapping request
2529 */
2530 SHARED_REGION_TRACE_ERROR(
2531 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2532 "vnode is not SIP-protected. \n",
2533 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2534 proc_getpid(p), p->p_comm,
2535 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2536 srfmp->vp->v_name));
2537 error = EPERM;
2538 goto done;
2539 }
2540 }
2541 #else /* CONFIG_CSR */
2542
2543 /*
2544 * Devices without SIP/ROSP need to make sure that the shared cache
2545 * is either on the root volume or in the preboot cryptex volume.
2546 */
2547 assert(rdir_vp != NULL);
2548 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2549 vnode_t preboot_vp = NULL;
2550 #if XNU_TARGET_OS_OSX
2551 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2552 #else
2553 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2554 #endif
2555 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2556 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2557 SHARED_REGION_TRACE_ERROR(
2558 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2559 "not on process' root volume nor preboot volume\n",
2560 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2561 proc_getpid(p), p->p_comm,
2562 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2563 srfmp->vp->v_name));
2564 error = EPERM;
2565 if (preboot_vp) {
2566 (void)vnode_put(preboot_vp);
2567 }
2568 goto done;
2569 } else if (preboot_vp) {
2570 (void)vnode_put(preboot_vp);
2571 }
2572 }
2573 #endif /* CONFIG_CSR */
2574
2575 if (scdir_enforce) {
2576 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2577 struct vnode *scdir_vp = NULL;
2578 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2579 *expected_scdir_path != NULL;
2580 expected_scdir_path++) {
2581 /* get vnode for expected_scdir_path */
2582 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2583 if (error) {
2584 SHARED_REGION_TRACE_ERROR(
2585 ("shared_region: %p [%d(%s)]: "
2586 "vnode_lookup(%s) failed (error=%d)\n",
2587 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2588 proc_getpid(p), p->p_comm,
2589 *expected_scdir_path, error));
2590 continue;
2591 }
2592
2593 /* check if parent is scdir_vp */
2594 assert(scdir_vp != NULL);
2595 if (vnode_parent(srfmp->vp) == scdir_vp) {
2596 (void)vnode_put(scdir_vp);
2597 scdir_vp = NULL;
2598 goto scdir_ok;
2599 }
2600 (void)vnode_put(scdir_vp);
2601 scdir_vp = NULL;
2602 }
2603 /* nothing matches */
2604 SHARED_REGION_TRACE_ERROR(
2605 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2606 "shared cache file not in expected directory\n",
2607 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2608 proc_getpid(p), p->p_comm,
2609 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2610 srfmp->vp->v_name));
2611 error = EPERM;
2612 goto done;
2613 }
2614 scdir_ok:
2615
2616 /* get vnode size */
2617 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2618 if (error) {
2619 SHARED_REGION_TRACE_ERROR(
2620 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2621 "vnode_size(%p) failed (error=%d)\n",
2622 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2623 proc_getpid(p), p->p_comm,
2624 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2625 srfmp->vp->v_name,
2626 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2627 goto done;
2628 }
2629 srfmp->file_size = fs;
2630
2631 /* get the file's memory object handle */
2632 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2633 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2634 SHARED_REGION_TRACE_ERROR(
2635 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2636 "no memory object\n",
2637 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2638 proc_getpid(p), p->p_comm,
2639 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2640 srfmp->vp->v_name));
2641 error = EINVAL;
2642 goto done;
2643 }
2644
2645 /* check that the mappings are properly covered by code signatures */
2646 if (!cs_system_enforcement()) {
2647 /* code signing is not enforced: no need to check */
2648 } else {
2649 for (i = 0; i < srfmp->mappings_count; i++) {
2650 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2651 /* zero-filled mapping: not backed by the file */
2652 continue;
2653 }
2654 if (ubc_cs_is_range_codesigned(srfmp->vp,
2655 srfmp->mappings[i].sms_file_offset,
2656 srfmp->mappings[i].sms_size)) {
2657 /* this mapping is fully covered by code signatures */
2658 continue;
2659 }
2660 SHARED_REGION_TRACE_ERROR(
2661 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2662 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2663 "is not code-signed\n",
2664 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2665 proc_getpid(p), p->p_comm,
2666 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2667 srfmp->vp->v_name,
2668 i, srfmp->mappings_count,
2669 srfmp->mappings[i].sms_address,
2670 srfmp->mappings[i].sms_size,
2671 srfmp->mappings[i].sms_file_offset,
2672 srfmp->mappings[i].sms_max_prot,
2673 srfmp->mappings[i].sms_init_prot));
2674 error = EINVAL;
2675 goto done;
2676 }
2677 }
2678 }
2679 done:
2680 if (error != 0) {
2681 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2682 *sr_file_mappings = NULL;
2683 *shared_region_ptr = NULL;
2684 }
2685 return error;
2686 }
2687
2688 /*
2689 * shared_region_map_np()
2690 *
2691 * This system call is intended for dyld.
2692 *
2693 * dyld uses this to map a shared cache file into a shared region.
2694 * This is usually done only the first time a shared cache is needed.
2695 * Subsequent processes will just use the populated shared region without
2696 * requiring any further setup.
2697 */
2698 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2699 _shared_region_map_and_slide(
2700 struct proc *p,
2701 uint32_t files_count,
2702 struct shared_file_np *files,
2703 uint32_t mappings_count,
2704 struct shared_file_mapping_slide_np *mappings)
2705 {
2706 int error = 0;
2707 kern_return_t kr = KERN_SUCCESS;
2708 struct _sr_file_mappings *sr_file_mappings = NULL;
2709 struct vnode *rdir_vp = NULL;
2710 struct vm_shared_region *shared_region = NULL;
2711
2712 /*
2713 * Get a reference to the current proc's root dir.
2714 * Need this to prevent racing with chroot.
2715 */
2716 proc_fdlock(p);
2717 rdir_vp = p->p_fd.fd_rdir;
2718 if (rdir_vp == NULL) {
2719 rdir_vp = rootvnode;
2720 }
2721 assert(rdir_vp != NULL);
2722 vnode_get(rdir_vp);
2723 proc_fdunlock(p);
2724
2725 /*
2726 * Turn files, mappings into sr_file_mappings and other setup.
2727 */
2728 error = shared_region_map_and_slide_setup(p, files_count,
2729 files, mappings_count, mappings,
2730 &sr_file_mappings, &shared_region, rdir_vp);
2731 if (error != 0) {
2732 vnode_put(rdir_vp);
2733 return error;
2734 }
2735
2736 /* map the file(s) into that shared region's submap */
2737 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2738 if (kr != KERN_SUCCESS) {
2739 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2740 "vm_shared_region_map_file() failed kr=0x%x\n",
2741 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2742 proc_getpid(p), p->p_comm, kr));
2743 }
2744
2745 /* convert kern_return_t to errno */
2746 switch (kr) {
2747 case KERN_SUCCESS:
2748 error = 0;
2749 break;
2750 case KERN_INVALID_ADDRESS:
2751 error = EFAULT;
2752 break;
2753 case KERN_PROTECTION_FAILURE:
2754 error = EPERM;
2755 break;
2756 case KERN_NO_SPACE:
2757 error = ENOMEM;
2758 break;
2759 case KERN_FAILURE:
2760 case KERN_INVALID_ARGUMENT:
2761 default:
2762 error = EINVAL;
2763 break;
2764 }
2765
2766 /*
2767 * Mark that this process is now using split libraries.
2768 */
2769 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2770 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2771 }
2772
2773 vnode_put(rdir_vp);
2774 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2775
2776 SHARED_REGION_TRACE_DEBUG(
2777 ("shared_region: %p [%d(%s)] <- map\n",
2778 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2779 proc_getpid(p), p->p_comm));
2780
2781 return error;
2782 }
2783
2784 /*
2785 * Clean up part of _shared_region_map_and_slide()
2786 * It had to be broken out of _shared_region_map_and_slide() to
2787 * prevent compiler inlining from blowing out the stack.
2788 */
2789 __attribute__((noinline))
2790 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2791 shared_region_map_and_slide_cleanup(
2792 struct proc *p,
2793 uint32_t files_count,
2794 struct _sr_file_mappings *sr_file_mappings,
2795 struct vm_shared_region *shared_region)
2796 {
2797 struct _sr_file_mappings *srfmp;
2798 struct vnode_attr va;
2799
2800 if (sr_file_mappings != NULL) {
2801 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2802 if (srfmp->vp != NULL) {
2803 vnode_lock_spin(srfmp->vp);
2804 srfmp->vp->v_flag |= VSHARED_DYLD;
2805 vnode_unlock(srfmp->vp);
2806
2807 /* update the vnode's access time */
2808 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2809 VATTR_INIT(&va);
2810 nanotime(&va.va_access_time);
2811 VATTR_SET_ACTIVE(&va, va_access_time);
2812 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2813 }
2814
2815 #if NAMEDSTREAMS
2816 /*
2817 * If the shared cache is compressed, it may
2818 * have a namedstream vnode instantiated for
2819 * for it. That namedstream vnode will also
2820 * have to be marked with VSHARED_DYLD.
2821 */
2822 if (vnode_hasnamedstreams(srfmp->vp)) {
2823 vnode_t svp;
2824 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2825 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2826 vnode_lock_spin(svp);
2827 svp->v_flag |= VSHARED_DYLD;
2828 vnode_unlock(svp);
2829 vnode_put(svp);
2830 }
2831 }
2832 #endif /* NAMEDSTREAMS */
2833 /*
2834 * release the vnode...
2835 * ubc_map() still holds it for us in the non-error case
2836 */
2837 (void) vnode_put(srfmp->vp);
2838 srfmp->vp = NULL;
2839 }
2840 if (srfmp->fp != NULL) {
2841 /* release the file descriptor */
2842 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2843 srfmp->fp = NULL;
2844 }
2845 }
2846 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2847 }
2848
2849 if (shared_region != NULL) {
2850 vm_shared_region_deallocate(shared_region);
2851 }
2852 }
2853
2854
2855 /*
2856 * For each file mapped, we may have mappings for:
2857 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2858 * so let's round up to 8 mappings per file.
2859 */
2860 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2861
2862 /*
2863 * This is the new interface for setting up shared region mappings.
2864 *
2865 * The slide used for shared regions setup using this interface is done differently
2866 * from the old interface. The slide value passed in the shared_files_np represents
2867 * a max value. The kernel will choose a random value based on that, then use it
2868 * for all shared regions.
2869 */
2870 #if defined (__x86_64__)
2871 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2872 #else
2873 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2874 #endif
2875
2876 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2877 shared_region_map_and_slide_2_np(
2878 struct proc *p,
2879 struct shared_region_map_and_slide_2_np_args *uap,
2880 __unused int *retvalp)
2881 {
2882 unsigned int files_count;
2883 struct shared_file_np *shared_files = NULL;
2884 unsigned int mappings_count;
2885 struct shared_file_mapping_slide_np *mappings = NULL;
2886 kern_return_t kr = KERN_SUCCESS;
2887
2888 files_count = uap->files_count;
2889 mappings_count = uap->mappings_count;
2890
2891 if (files_count == 0) {
2892 SHARED_REGION_TRACE_INFO(
2893 ("shared_region: %p [%d(%s)] map(): "
2894 "no files\n",
2895 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2896 proc_getpid(p), p->p_comm));
2897 kr = 0; /* no files to map: we're done ! */
2898 goto done;
2899 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2900 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2901 if (shared_files == NULL) {
2902 kr = KERN_RESOURCE_SHORTAGE;
2903 goto done;
2904 }
2905 } else {
2906 SHARED_REGION_TRACE_ERROR(
2907 ("shared_region: %p [%d(%s)] map(): "
2908 "too many files (%d) max %d\n",
2909 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2910 proc_getpid(p), p->p_comm,
2911 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2912 kr = KERN_FAILURE;
2913 goto done;
2914 }
2915
2916 if (mappings_count == 0) {
2917 SHARED_REGION_TRACE_INFO(
2918 ("shared_region: %p [%d(%s)] map(): "
2919 "no mappings\n",
2920 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2921 proc_getpid(p), p->p_comm));
2922 kr = 0; /* no mappings: we're done ! */
2923 goto done;
2924 } else if (mappings_count <= SFM_MAX) {
2925 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2926 if (mappings == NULL) {
2927 kr = KERN_RESOURCE_SHORTAGE;
2928 goto done;
2929 }
2930 } else {
2931 SHARED_REGION_TRACE_ERROR(
2932 ("shared_region: %p [%d(%s)] map(): "
2933 "too many mappings (%d) max %d\n",
2934 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2935 proc_getpid(p), p->p_comm,
2936 mappings_count, SFM_MAX));
2937 kr = KERN_FAILURE;
2938 goto done;
2939 }
2940
2941 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2942 if (kr != KERN_SUCCESS) {
2943 goto done;
2944 }
2945
2946 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2947 if (kr != KERN_SUCCESS) {
2948 goto done;
2949 }
2950
2951 uint32_t max_slide = shared_files[0].sf_slide;
2952 uint32_t random_val;
2953 uint32_t slide_amount;
2954
2955 if (max_slide != 0) {
2956 read_random(&random_val, sizeof random_val);
2957 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2958 } else {
2959 slide_amount = 0;
2960 }
2961 #if DEVELOPMENT || DEBUG
2962 extern bool bootarg_disable_aslr;
2963 if (bootarg_disable_aslr) {
2964 slide_amount = 0;
2965 }
2966 #endif /* DEVELOPMENT || DEBUG */
2967
2968 /*
2969 * Fix up the mappings to reflect the desired slide.
2970 */
2971 unsigned int f;
2972 unsigned int m = 0;
2973 unsigned int i;
2974 for (f = 0; f < files_count; ++f) {
2975 shared_files[f].sf_slide = slide_amount;
2976 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2977 if (m >= mappings_count) {
2978 SHARED_REGION_TRACE_ERROR(
2979 ("shared_region: %p [%d(%s)] map(): "
2980 "mapping count argument was too small\n",
2981 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2982 proc_getpid(p), p->p_comm));
2983 kr = KERN_FAILURE;
2984 goto done;
2985 }
2986 mappings[m].sms_address += slide_amount;
2987 if (mappings[m].sms_slide_size != 0) {
2988 mappings[m].sms_slide_start += slide_amount;
2989 }
2990 }
2991 }
2992
2993 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2994 done:
2995 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2996 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2997 return kr;
2998 }
2999
3000 /*
3001 * A syscall for dyld to use to map data pages that need load time relocation fixups.
3002 * The fixups are performed by a custom pager during page-in, so the pages still appear
3003 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3004 * on demand later, all w/o using the compressor.
3005 *
3006 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3007 * running, they are COW'd as normal.
3008 */
3009 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)3010 map_with_linking_np(
3011 struct proc *p,
3012 struct map_with_linking_np_args *uap,
3013 __unused int *retvalp)
3014 {
3015 uint32_t region_count;
3016 uint32_t r;
3017 struct mwl_region *regions = NULL;
3018 struct mwl_region *rp;
3019 uint32_t link_info_size;
3020 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
3021 struct mwl_info_hdr *info_hdr = NULL;
3022 uint64_t binds_size;
3023 int fd;
3024 struct fileproc *fp = NULL;
3025 struct vnode *vp = NULL;
3026 size_t file_size;
3027 off_t fs;
3028 struct vnode_attr va;
3029 memory_object_control_t file_control = NULL;
3030 int error;
3031 kern_return_t kr = KERN_SUCCESS;
3032
3033 /*
3034 * Check if dyld has told us it finished with this call.
3035 */
3036 if (p->p_disallow_map_with_linking) {
3037 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3038 __func__, proc_getpid(p), p->p_comm);
3039 kr = KERN_FAILURE;
3040 goto done;
3041 }
3042
3043 /*
3044 * First we do some sanity checking on what dyld has passed us.
3045 */
3046 region_count = uap->region_count;
3047 link_info_size = uap->link_info_size;
3048 if (region_count == 0) {
3049 printf("%s: [%d(%s)]: region_count == 0\n",
3050 __func__, proc_getpid(p), p->p_comm);
3051 kr = KERN_FAILURE;
3052 goto done;
3053 }
3054 if (region_count > MWL_MAX_REGION_COUNT) {
3055 printf("%s: [%d(%s)]: region_count too big %d\n",
3056 __func__, proc_getpid(p), p->p_comm, region_count);
3057 kr = KERN_FAILURE;
3058 goto done;
3059 }
3060
3061 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3062 printf("%s: [%d(%s)]: link_info_size too small\n",
3063 __func__, proc_getpid(p), p->p_comm);
3064 kr = KERN_FAILURE;
3065 goto done;
3066 }
3067 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3068 printf("%s: [%d(%s)]: link_info_size too big %d\n",
3069 __func__, proc_getpid(p), p->p_comm, link_info_size);
3070 kr = KERN_FAILURE;
3071 goto done;
3072 }
3073
3074 /*
3075 * Allocate and copyin the regions and link info
3076 */
3077 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3078 if (regions == NULL) {
3079 printf("%s: [%d(%s)]: failed to allocate regions\n",
3080 __func__, proc_getpid(p), p->p_comm);
3081 kr = KERN_RESOURCE_SHORTAGE;
3082 goto done;
3083 }
3084 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3085 if (kr != KERN_SUCCESS) {
3086 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3087 __func__, proc_getpid(p), p->p_comm, kr);
3088 goto done;
3089 }
3090
3091 link_info = kalloc_data(link_info_size, Z_WAITOK);
3092 if (link_info == NULL) {
3093 printf("%s: [%d(%s)]: failed to allocate link_info\n",
3094 __func__, proc_getpid(p), p->p_comm);
3095 kr = KERN_RESOURCE_SHORTAGE;
3096 goto done;
3097 }
3098 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3099 if (kr != KERN_SUCCESS) {
3100 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3101 __func__, proc_getpid(p), p->p_comm, kr);
3102 goto done;
3103 }
3104
3105 /*
3106 * Do some verification the data structures.
3107 */
3108 info_hdr = (struct mwl_info_hdr *)link_info;
3109 if (info_hdr->mwli_version != MWL_INFO_VERS) {
3110 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3111 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3112 kr = KERN_FAILURE;
3113 goto done;
3114 }
3115
3116 if (info_hdr->mwli_binds_offset > link_info_size) {
3117 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3118 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3119 kr = KERN_FAILURE;
3120 goto done;
3121 }
3122
3123 /* some older devs have s/w page size > h/w page size, no need to support them */
3124 if (info_hdr->mwli_page_size != PAGE_SIZE) {
3125 /* no printf, since this is expected on some devices */
3126 kr = KERN_INVALID_ARGUMENT;
3127 goto done;
3128 }
3129
3130 binds_size = (uint64_t)info_hdr->mwli_binds_count *
3131 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3132 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3133 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3134 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3135 kr = KERN_FAILURE;
3136 goto done;
3137 }
3138
3139 if (info_hdr->mwli_chains_offset > link_info_size) {
3140 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3141 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3142 kr = KERN_FAILURE;
3143 goto done;
3144 }
3145
3146
3147 /*
3148 * Ensure the chained starts in the link info and make sure the
3149 * segment info offsets are within bounds.
3150 */
3151 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3152 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3153 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3154 kr = KERN_FAILURE;
3155 goto done;
3156 }
3157 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3158 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3159 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3160 kr = KERN_FAILURE;
3161 goto done;
3162 }
3163
3164 /* Note that more verification of offsets is done in the pager itself */
3165
3166 /*
3167 * Ensure we've only been given one FD and verify valid protections.
3168 */
3169 fd = regions[0].mwlr_fd;
3170 for (r = 0; r < region_count; ++r) {
3171 if (regions[r].mwlr_fd != fd) {
3172 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3173 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3174 kr = KERN_FAILURE;
3175 goto done;
3176 }
3177 regions[r].mwlr_protections &= VM_PROT_ALL;
3178 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3179 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3180 __func__, proc_getpid(p), p->p_comm);
3181 kr = KERN_FAILURE;
3182 goto done;
3183 }
3184 }
3185
3186
3187 /* get file structure from file descriptor */
3188 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3189 if (error) {
3190 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3191 __func__, proc_getpid(p), p->p_comm, error);
3192 kr = KERN_FAILURE;
3193 goto done;
3194 }
3195
3196 /* We need at least read permission on the file */
3197 if (!(fp->fp_glob->fg_flag & FREAD)) {
3198 printf("%s: [%d(%s)]: not readable\n",
3199 __func__, proc_getpid(p), p->p_comm);
3200 kr = KERN_FAILURE;
3201 goto done;
3202 }
3203
3204 /* Get the vnode from file structure */
3205 vp = (struct vnode *)fp_get_data(fp);
3206 error = vnode_getwithref(vp);
3207 if (error) {
3208 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3209 __func__, proc_getpid(p), p->p_comm, error);
3210 kr = KERN_FAILURE;
3211 vp = NULL; /* just to be sure */
3212 goto done;
3213 }
3214
3215 /* Make sure the vnode is a regular file */
3216 if (vp->v_type != VREG) {
3217 printf("%s: [%d(%s)]: vnode not VREG\n",
3218 __func__, proc_getpid(p), p->p_comm);
3219 kr = KERN_FAILURE;
3220 goto done;
3221 }
3222
3223 /* get vnode size */
3224 error = vnode_size(vp, &fs, vfs_context_current());
3225 if (error) {
3226 goto done;
3227 }
3228 file_size = fs;
3229
3230 /* get the file's memory object handle */
3231 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3232 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3233 printf("%s: [%d(%s)]: no memory object\n",
3234 __func__, proc_getpid(p), p->p_comm);
3235 kr = KERN_FAILURE;
3236 goto done;
3237 }
3238
3239 for (r = 0; r < region_count; ++r) {
3240 rp = ®ions[r];
3241
3242 /*
3243 * Only allow data mappings and not zero fill.
3244 */
3245 if (rp->mwlr_protections & VM_PROT_ZF) {
3246 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF\n",
3247 __func__, proc_getpid(p), p->p_comm, r);
3248 kr = KERN_FAILURE;
3249 goto done;
3250 }
3251 if (rp->mwlr_protections & VM_PROT_EXECUTE) {
3252 printf("%s: [%d(%s)]: region %d, found VM_PROT_EXECUTE\n",
3253 __func__, proc_getpid(p), p->p_comm, r);
3254 kr = KERN_FAILURE;
3255 goto done;
3256 }
3257
3258 #if CONFIG_MACF
3259 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3260 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3261 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3262 if (error) {
3263 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3264 __func__, proc_getpid(p), p->p_comm, r, error);
3265 kr = KERN_FAILURE;
3266 goto done;
3267 }
3268 #endif /* MAC */
3269
3270 /* check that the mappings are properly covered by code signatures */
3271 if (cs_system_enforcement()) {
3272 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3273 printf("%s: [%d(%s)]: region %d, not code signed\n",
3274 __func__, proc_getpid(p), p->p_comm, r);
3275 kr = KERN_FAILURE;
3276 goto done;
3277 }
3278 }
3279 }
3280
3281 /* update the vnode's access time */
3282 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3283 VATTR_INIT(&va);
3284 nanotime(&va.va_access_time);
3285 VATTR_SET_ACTIVE(&va, va_access_time);
3286 vnode_setattr(vp, &va, vfs_context_current());
3287 }
3288
3289 /* get the VM to do the work */
3290 kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3291
3292 done:
3293 if (fp != NULL) {
3294 /* release the file descriptor */
3295 fp_drop(p, fd, fp, 0);
3296 }
3297 if (vp != NULL) {
3298 (void)vnode_put(vp);
3299 }
3300 if (regions != NULL) {
3301 kfree_data(regions, region_count * sizeof(regions[0]));
3302 }
3303 /* link info is used in the pager if things worked */
3304 if (link_info != NULL && kr != KERN_SUCCESS) {
3305 kfree_data(link_info, link_info_size);
3306 }
3307
3308 switch (kr) {
3309 case KERN_SUCCESS:
3310 return 0;
3311 case KERN_RESOURCE_SHORTAGE:
3312 return ENOMEM;
3313 default:
3314 return EINVAL;
3315 }
3316 }
3317
3318 #if DEBUG || DEVELOPMENT
3319 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3320 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3321 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3322 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3323 #endif /* DEBUG || DEVELOPMENT */
3324
3325 /* sysctl overflow room */
3326
3327 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3328 (int *) &page_size, 0, "vm page size");
3329
3330 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3331 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3332 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3333 extern unsigned int vm_page_free_target;
3334 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3335 &vm_page_free_target, 0, "Pageout daemon free target");
3336
3337 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3338 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3339
3340 static int
3341 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3342 {
3343 #pragma unused(oidp, arg1, arg2)
3344 unsigned int page_free_wanted;
3345
3346 page_free_wanted = mach_vm_ctl_page_free_wanted();
3347 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3348 }
3349 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3350 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3351 0, 0, vm_ctl_page_free_wanted, "I", "");
3352
3353 extern unsigned int vm_page_purgeable_count;
3354 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3355 &vm_page_purgeable_count, 0, "Purgeable page count");
3356
3357 extern unsigned int vm_page_purgeable_wired_count;
3358 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3359 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3360
3361 extern unsigned int vm_page_kern_lpage_count;
3362 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3363 &vm_page_kern_lpage_count, 0, "kernel used large pages");
3364
3365 #if DEVELOPMENT || DEBUG
3366 #if __ARM_MIXED_PAGE_SIZE__
3367 static int vm_mixed_pagesize_supported = 1;
3368 #else
3369 static int vm_mixed_pagesize_supported = 0;
3370 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3371 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3372 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3373
3374 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3375 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3376 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3377 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3378
3379 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3380 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3381 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3382 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3383 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3384 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3385
3386 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3387 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3388 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3389 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3390 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3391 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3392 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3393 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3394 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3395 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3396 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3397 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3398 #endif /* DEVELOPMENT || DEBUG */
3399
3400 extern int madvise_free_debug;
3401 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3402 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3403 extern int madvise_free_debug_sometimes;
3404 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3405 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3406
3407 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3408 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3409 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3410 &vm_page_stats_reusable.reusable_pages_success, "");
3411 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3412 &vm_page_stats_reusable.reusable_pages_failure, "");
3413 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3414 &vm_page_stats_reusable.reusable_pages_shared, "");
3415 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3416 &vm_page_stats_reusable.all_reusable_calls, "");
3417 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3418 &vm_page_stats_reusable.partial_reusable_calls, "");
3419 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3420 &vm_page_stats_reusable.reuse_pages_success, "");
3421 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3422 &vm_page_stats_reusable.reuse_pages_failure, "");
3423 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3424 &vm_page_stats_reusable.all_reuse_calls, "");
3425 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3426 &vm_page_stats_reusable.partial_reuse_calls, "");
3427 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3428 &vm_page_stats_reusable.can_reuse_success, "");
3429 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3430 &vm_page_stats_reusable.can_reuse_failure, "");
3431 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3432 &vm_page_stats_reusable.reusable_reclaimed, "");
3433 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3434 &vm_page_stats_reusable.reusable_nonwritable, "");
3435 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3436 &vm_page_stats_reusable.reusable_shared, "");
3437 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3438 &vm_page_stats_reusable.free_shared, "");
3439
3440
3441 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3442 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3443 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3444
3445 extern unsigned int vm_page_cleaned_count;
3446 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3447
3448 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3449 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3450 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3451
3452 /* pageout counts */
3453 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3454 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3455
3456 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3457 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3458 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3459 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3460 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3461 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3462
3463 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3464 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3465 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3466 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3467 extern unsigned int vm_page_realtime_count;
3468 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3469 extern int vm_pageout_protect_realtime;
3470 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3471
3472 /* counts of pages prefaulted when entering a memory object */
3473 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3474 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3475 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3476
3477 #if defined (__x86_64__)
3478 extern unsigned int vm_clump_promote_threshold;
3479 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3480 #if DEVELOPMENT || DEBUG
3481 extern unsigned long vm_clump_stats[];
3482 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3483 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3488 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3489 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3490 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3491 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3492 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3493 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3494 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3498 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3500 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3503 #endif /* if DEVELOPMENT || DEBUG */
3504 #endif /* #if defined (__x86_64__) */
3505
3506 #if CONFIG_SECLUDED_MEMORY
3507
3508 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3509 extern unsigned int vm_page_secluded_target;
3510 extern unsigned int vm_page_secluded_count;
3511 extern unsigned int vm_page_secluded_count_free;
3512 extern unsigned int vm_page_secluded_count_inuse;
3513 extern unsigned int vm_page_secluded_count_over_target;
3514 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3515 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3516 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3517 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3518 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3519
3520 extern struct vm_page_secluded_data vm_page_secluded;
3521 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3522 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3523 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3524 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3525 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3526 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3530
3531 #endif /* CONFIG_SECLUDED_MEMORY */
3532
3533 #include <kern/thread.h>
3534 #include <sys/user.h>
3535
3536 void vm_pageout_io_throttle(void);
3537
3538 void
vm_pageout_io_throttle(void)3539 vm_pageout_io_throttle(void)
3540 {
3541 struct uthread *uthread = current_uthread();
3542
3543 /*
3544 * thread is marked as a low priority I/O type
3545 * and the I/O we issued while in this cleaning operation
3546 * collided with normal I/O operations... we'll
3547 * delay in order to mitigate the impact of this
3548 * task on the normal operation of the system
3549 */
3550
3551 if (uthread->uu_lowpri_window) {
3552 throttle_lowpri_io(1);
3553 }
3554 }
3555
3556 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3557 vm_pressure_monitor(
3558 __unused struct proc *p,
3559 struct vm_pressure_monitor_args *uap,
3560 int *retval)
3561 {
3562 kern_return_t kr;
3563 uint32_t pages_reclaimed;
3564 uint32_t pages_wanted;
3565
3566 kr = mach_vm_pressure_monitor(
3567 (boolean_t) uap->wait_for_pressure,
3568 uap->nsecs_monitored,
3569 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3570 &pages_wanted);
3571
3572 switch (kr) {
3573 case KERN_SUCCESS:
3574 break;
3575 case KERN_ABORTED:
3576 return EINTR;
3577 default:
3578 return EINVAL;
3579 }
3580
3581 if (uap->pages_reclaimed) {
3582 if (copyout((void *)&pages_reclaimed,
3583 uap->pages_reclaimed,
3584 sizeof(pages_reclaimed)) != 0) {
3585 return EFAULT;
3586 }
3587 }
3588
3589 *retval = (int) pages_wanted;
3590 return 0;
3591 }
3592
3593 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3594 kas_info(struct proc *p,
3595 struct kas_info_args *uap,
3596 int *retval __unused)
3597 {
3598 #ifndef CONFIG_KAS_INFO
3599 (void)p;
3600 (void)uap;
3601 return ENOTSUP;
3602 #else /* CONFIG_KAS_INFO */
3603 int selector = uap->selector;
3604 user_addr_t valuep = uap->value;
3605 user_addr_t sizep = uap->size;
3606 user_size_t size, rsize;
3607 int error;
3608
3609 if (!kauth_cred_issuser(kauth_cred_get())) {
3610 return EPERM;
3611 }
3612
3613 #if CONFIG_MACF
3614 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3615 if (error) {
3616 return error;
3617 }
3618 #endif
3619
3620 if (IS_64BIT_PROCESS(p)) {
3621 user64_size_t size64;
3622 error = copyin(sizep, &size64, sizeof(size64));
3623 size = (user_size_t)size64;
3624 } else {
3625 user32_size_t size32;
3626 error = copyin(sizep, &size32, sizeof(size32));
3627 size = (user_size_t)size32;
3628 }
3629 if (error) {
3630 return error;
3631 }
3632
3633 switch (selector) {
3634 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3635 {
3636 uint64_t slide = vm_kernel_slide;
3637
3638 if (sizeof(slide) != size) {
3639 return EINVAL;
3640 }
3641
3642 error = copyout(&slide, valuep, sizeof(slide));
3643 if (error) {
3644 return error;
3645 }
3646 rsize = size;
3647 }
3648 break;
3649 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3650 {
3651 uint32_t i;
3652 kernel_mach_header_t *mh = &_mh_execute_header;
3653 struct load_command *cmd;
3654 cmd = (struct load_command*) &mh[1];
3655 uint64_t *bases;
3656 rsize = mh->ncmds * sizeof(uint64_t);
3657
3658 /*
3659 * Return the size if no data was passed
3660 */
3661 if (valuep == 0) {
3662 break;
3663 }
3664
3665 if (rsize > size) {
3666 return EINVAL;
3667 }
3668
3669 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3670
3671 for (i = 0; i < mh->ncmds; i++) {
3672 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3673 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3674 bases[i] = (uint64_t)sg->vmaddr;
3675 }
3676 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3677 }
3678
3679 error = copyout(bases, valuep, rsize);
3680
3681 kfree_data(bases, rsize);
3682
3683 if (error) {
3684 return error;
3685 }
3686 }
3687 break;
3688 default:
3689 return EINVAL;
3690 }
3691
3692 if (IS_64BIT_PROCESS(p)) {
3693 user64_size_t size64 = (user64_size_t)rsize;
3694 error = copyout(&size64, sizep, sizeof(size64));
3695 } else {
3696 user32_size_t size32 = (user32_size_t)rsize;
3697 error = copyout(&size32, sizep, sizeof(size32));
3698 }
3699
3700 return error;
3701 #endif /* CONFIG_KAS_INFO */
3702 }
3703
3704 #if __has_feature(ptrauth_calls)
3705 /*
3706 * Generate a random pointer signing key that isn't 0.
3707 */
3708 uint64_t
generate_jop_key(void)3709 generate_jop_key(void)
3710 {
3711 uint64_t key;
3712
3713 do {
3714 read_random(&key, sizeof key);
3715 } while (key == 0);
3716 return key;
3717 }
3718 #endif /* __has_feature(ptrauth_calls) */
3719
3720
3721 #pragma clang diagnostic push
3722 #pragma clang diagnostic ignored "-Wcast-qual"
3723 #pragma clang diagnostic ignored "-Wunused-function"
3724
3725 static void
asserts()3726 asserts()
3727 {
3728 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3729 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3730 }
3731
3732 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3733 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3734 #pragma clang diagnostic pop
3735
3736 extern uint32_t vm_page_pages;
3737 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3738
3739 extern uint32_t vm_page_busy_absent_skipped;
3740 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3741
3742 extern uint32_t vm_page_upl_tainted;
3743 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3744
3745 extern uint32_t vm_page_iopl_tainted;
3746 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3747
3748 #if __arm64__ && (DEVELOPMENT || DEBUG)
3749 extern int vm_footprint_suspend_allowed;
3750 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3751
3752 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3753 static int
3754 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3755 {
3756 #pragma unused(oidp, arg1, arg2)
3757 int error = 0;
3758 int new_value;
3759
3760 if (req->newptr == USER_ADDR_NULL) {
3761 return 0;
3762 }
3763 error = SYSCTL_IN(req, &new_value, sizeof(int));
3764 if (error) {
3765 return error;
3766 }
3767 if (!vm_footprint_suspend_allowed) {
3768 if (new_value != 0) {
3769 /* suspends are not allowed... */
3770 return 0;
3771 }
3772 /* ... but let resumes proceed */
3773 }
3774 DTRACE_VM2(footprint_suspend,
3775 vm_map_t, current_map(),
3776 int, new_value);
3777
3778 pmap_footprint_suspend(current_map(), new_value);
3779
3780 return 0;
3781 }
3782 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3783 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3784 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3785 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3786
3787 extern uint64_t vm_map_corpse_footprint_count;
3788 extern uint64_t vm_map_corpse_footprint_size_avg;
3789 extern uint64_t vm_map_corpse_footprint_size_max;
3790 extern uint64_t vm_map_corpse_footprint_full;
3791 extern uint64_t vm_map_corpse_footprint_no_buf;
3792 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3793 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3794 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3795 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3796 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3797 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3798 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3799 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3800 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3801 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3802
3803 #if CODE_SIGNING_MONITOR
3804 extern uint64_t vm_cs_defer_to_csm;
3805 extern uint64_t vm_cs_defer_to_csm_not;
3806 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3807 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3808 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3809 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3810 #endif /* CODE_SIGNING_MONITOR */
3811
3812 extern uint64_t shared_region_pager_copied;
3813 extern uint64_t shared_region_pager_slid;
3814 extern uint64_t shared_region_pager_slid_error;
3815 extern uint64_t shared_region_pager_reclaimed;
3816 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3817 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3818 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3819 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3820 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3821 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3822 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3823 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3824 extern int shared_region_destroy_delay;
3825 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3826 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3827
3828 #if MACH_ASSERT
3829 extern int pmap_ledgers_panic_leeway;
3830 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3831 #endif /* MACH_ASSERT */
3832
3833
3834 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3835 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3836 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3837 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3838 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3839 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3840 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3841 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3842 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3843 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3844 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3845 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3846 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3847 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3848 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3849 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3850 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3851 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3852 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3853 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3854 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3855 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3856 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3857 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3858 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3859 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3860 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3861 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3862 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3863 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3864 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3865 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3866 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3867 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3868 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3869 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3870 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3871 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3872 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3873
3874 extern int vm_protect_privileged_from_untrusted;
3875 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3876 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3877 extern uint64_t vm_copied_on_read;
3878 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3879 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3880
3881 extern int vm_shared_region_count;
3882 extern int vm_shared_region_peak;
3883 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3884 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3885 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3886 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3887 #if DEVELOPMENT || DEBUG
3888 extern unsigned int shared_region_pagers_resident_count;
3889 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3890 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3891 extern unsigned int shared_region_pagers_resident_peak;
3892 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3893 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3894 extern int shared_region_pager_count;
3895 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3896 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3897 #if __has_feature(ptrauth_calls)
3898 extern int shared_region_key_count;
3899 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3900 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3901 extern int vm_shared_region_reslide_count;
3902 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3903 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3904 #endif /* __has_feature(ptrauth_calls) */
3905 #endif /* DEVELOPMENT || DEBUG */
3906
3907 #if MACH_ASSERT
3908 extern int debug4k_filter;
3909 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3910 extern int debug4k_panic_on_terminate;
3911 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3912 extern int debug4k_panic_on_exception;
3913 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3914 extern int debug4k_panic_on_misaligned_sharing;
3915 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3916 #endif /* MACH_ASSERT */
3917
3918 extern uint64_t vm_map_set_size_limit_count;
3919 extern uint64_t vm_map_set_data_limit_count;
3920 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3921 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3922 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3923 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3924 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3925 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3926
3927 extern uint64_t vm_fault_resilient_media_initiate;
3928 extern uint64_t vm_fault_resilient_media_retry;
3929 extern uint64_t vm_fault_resilient_media_proceed;
3930 extern uint64_t vm_fault_resilient_media_release;
3931 extern uint64_t vm_fault_resilient_media_abort1;
3932 extern uint64_t vm_fault_resilient_media_abort2;
3933 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3934 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3935 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3936 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3937 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3938 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3939 #if MACH_ASSERT
3940 extern int vm_fault_resilient_media_inject_error1_rate;
3941 extern int vm_fault_resilient_media_inject_error1;
3942 extern int vm_fault_resilient_media_inject_error2_rate;
3943 extern int vm_fault_resilient_media_inject_error2;
3944 extern int vm_fault_resilient_media_inject_error3_rate;
3945 extern int vm_fault_resilient_media_inject_error3;
3946 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3947 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3948 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3949 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3950 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3951 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3952 #endif /* MACH_ASSERT */
3953
3954 extern uint64_t pmap_query_page_info_retries;
3955 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3956
3957 /*
3958 * A sysctl which causes all existing shared regions to become stale. They
3959 * will no longer be used by anything new and will be torn down as soon as
3960 * the last existing user exits. A write of non-zero value causes that to happen.
3961 * This should only be used by launchd, so we check that this is initproc.
3962 */
3963 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3964 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3965 {
3966 unsigned int value = 0;
3967 int changed = 0;
3968 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3969 if (error || !changed) {
3970 return error;
3971 }
3972 if (current_proc() != initproc) {
3973 return EPERM;
3974 }
3975
3976 vm_shared_region_pivot();
3977
3978 return 0;
3979 }
3980
3981 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3982 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3983 0, 0, shared_region_pivot, "I", "");
3984
3985 extern uint64_t vm_object_shadow_forced;
3986 extern uint64_t vm_object_shadow_skipped;
3987 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3988 &vm_object_shadow_forced, "");
3989 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3990 &vm_object_shadow_skipped, "");
3991
3992 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3993 &vmtc_total, 0, "total text page corruptions detected");
3994
3995
3996 #if DEBUG || DEVELOPMENT
3997 /*
3998 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3999 * Used for testing text page self healing.
4000 */
4001 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4002 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4003 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4004 {
4005 uint64_t value = 0;
4006 int error = sysctl_handle_quad(oidp, &value, 0, req);
4007 if (error || !req->newptr) {
4008 return error;
4009 }
4010
4011 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4012 return 0;
4013 } else {
4014 return EINVAL;
4015 }
4016 }
4017
4018 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4019 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4020 0, 0, corrupt_text_addr, "-", "");
4021 #endif /* DEBUG || DEVELOPMENT */
4022
4023 #if DEBUG || DEVELOPMENT
4024 #if CONFIG_MAP_RANGES
4025 static int
4026 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4027 {
4028 #pragma unused(arg1, arg2, oidp)
4029 struct mach_vm_range range;
4030
4031 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4032 != KERN_SUCCESS) {
4033 return EINVAL;
4034 }
4035
4036 return SYSCTL_OUT(req, &range, sizeof(range));
4037 }
4038
4039 static int
4040 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4041 {
4042 #pragma unused(arg1, arg2, oidp)
4043 struct mach_vm_range range;
4044
4045 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4046 != KERN_SUCCESS) {
4047 return EINVAL;
4048 }
4049
4050 return SYSCTL_OUT(req, &range, sizeof(range));
4051 }
4052
4053 /*
4054 * A sysctl that can be used to return ranges for the current VM map.
4055 * Used for testing VM ranges.
4056 */
4057 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4058 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4059 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4060 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4061
4062 #endif /* CONFIG_MAP_RANGES */
4063 #endif /* DEBUG || DEVELOPMENT */
4064
4065 #if DEBUG || DEVELOPMENT
4066 #endif /* DEBUG || DEVELOPMENT */
4067
4068 extern uint64_t vm_map_range_overflows_count;
4069 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4070 extern boolean_t vm_map_range_overflows_log;
4071 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4072
4073 extern uint64_t c_seg_filled_no_contention;
4074 extern uint64_t c_seg_filled_contention;
4075 extern clock_sec_t c_seg_filled_contention_sec_max;
4076 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4077 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4078 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4079 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4080 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4081 #if (XNU_TARGET_OS_OSX && __arm64__)
4082 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4083 extern int c_process_major_yield_after; /* yield after moving ? segments */
4084 extern uint64_t c_process_major_reports;
4085 extern clock_sec_t c_process_major_max_sec;
4086 extern clock_nsec_t c_process_major_max_nsec;
4087 extern uint32_t c_process_major_peak_segcount;
4088 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4089 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4090 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4091 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4092 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4093 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4094 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4095
4096 #if DEVELOPMENT || DEBUG
4097 extern int panic_object_not_alive;
4098 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4099 #endif /* DEVELOPMENT || DEBUG */
4100
4101 #if MACH_ASSERT
4102 extern int fbdp_no_panic;
4103 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4104 #endif /* MACH_ASSERT */
4105