1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97
98 #include <kern/kalloc.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_kern.h>
101 #include <vm/vm_pageout.h>
102
103 #include <mach/shared_region.h>
104 #include <vm/vm_shared_region.h>
105
106 #include <vm/vm_dyld_pager.h>
107
108 #include <vm/vm_protos.h>
109
110 #include <sys/kern_memorystatus.h>
111 #include <sys/kern_memorystatus_freeze.h>
112 #include <sys/proc_internal.h>
113
114 #include <mach-o/fixup-chains.h>
115
116 #if CONFIG_MACF
117 #include <security/mac_framework.h>
118 #endif
119
120 #include <kern/bits.h>
121
122 #if CONFIG_CSR
123 #include <sys/csr.h>
124 #endif /* CONFIG_CSR */
125 #include <sys/trust_caches.h>
126 #include <libkern/amfi/amfi.h>
127 #include <IOKit/IOBSD.h>
128
129 #if VM_MAP_DEBUG_APPLE_PROTECT
130 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
131 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
132
133 #if VM_MAP_DEBUG_FOURK
134 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
135 #endif /* VM_MAP_DEBUG_FOURK */
136
137 #if DEVELOPMENT || DEBUG
138
139 static int
140 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 vm_offset_t kaddr;
144 kern_return_t kr;
145 int error = 0;
146 int size = 0;
147
148 error = sysctl_handle_int(oidp, &size, 0, req);
149 if (error || !req->newptr) {
150 return error;
151 }
152
153 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
154 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
155
156 if (kr == KERN_SUCCESS) {
157 kmem_free(kernel_map, kaddr, size);
158 }
159
160 return error;
161 }
162
163 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
164 0, 0, &sysctl_kmem_alloc_contig, "I", "");
165
166 extern int vm_region_footprint;
167 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
168
169 static int
170 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 kmem_gobj_stats stats = kmem_get_gobj_stats();
174
175 return SYSCTL_OUT(req, &stats, sizeof(stats));
176 }
177
178 SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
179 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
180 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
181
182 #endif /* DEVELOPMENT || DEBUG */
183
184 static int
185 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
186 {
187 #pragma unused(arg1, arg2, oidp)
188 int error = 0;
189 int value;
190
191 value = task_self_region_footprint();
192 error = SYSCTL_OUT(req, &value, sizeof(int));
193 if (error) {
194 return error;
195 }
196
197 if (!req->newptr) {
198 return 0;
199 }
200
201 error = SYSCTL_IN(req, &value, sizeof(int));
202 if (error) {
203 return error;
204 }
205 task_self_region_footprint_set(value);
206 return 0;
207 }
208 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
209
210 static int
211 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
212 {
213 #pragma unused(arg1, arg2, oidp)
214 int error = 0;
215 int value;
216
217 value = (1 << thread_self_region_page_shift());
218 error = SYSCTL_OUT(req, &value, sizeof(int));
219 if (error) {
220 return error;
221 }
222
223 if (!req->newptr) {
224 return 0;
225 }
226
227 error = SYSCTL_IN(req, &value, sizeof(int));
228 if (error) {
229 return error;
230 }
231
232 if (value != 0 && value != 4096 && value != 16384) {
233 return EINVAL;
234 }
235
236 #if !__ARM_MIXED_PAGE_SIZE__
237 if (value != vm_map_page_size(current_map())) {
238 return EINVAL;
239 }
240 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
241
242 thread_self_region_page_shift_set(bit_first(value));
243 return 0;
244 }
245 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
246
247
248 #if DEVELOPMENT || DEBUG
249 extern int panic_on_unsigned_execute;
250 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
251
252 extern int vm_log_xnu_user_debug;
253 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
254 #endif /* DEVELOPMENT || DEBUG */
255
256 extern int cs_executable_create_upl;
257 extern int cs_executable_wire;
258 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
259 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
260
261 extern int apple_protect_pager_count;
262 extern int apple_protect_pager_count_mapped;
263 extern unsigned int apple_protect_pager_cache_limit;
264 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
265 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
266 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
267
268 #if DEVELOPMENT || DEBUG
269 extern int radar_20146450;
270 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
271
272 extern int macho_printf;
273 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
274
275 extern int apple_protect_pager_data_request_debug;
276 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
277
278 #if __arm64__
279 /* These are meant to support the page table accounting unit test. */
280 extern unsigned int arm_hardware_page_size;
281 extern unsigned int arm_pt_desc_size;
282 extern unsigned int arm_pt_root_size;
283 extern unsigned int inuse_user_tteroot_count;
284 extern unsigned int inuse_kernel_tteroot_count;
285 extern unsigned int inuse_user_ttepages_count;
286 extern unsigned int inuse_kernel_ttepages_count;
287 extern unsigned int inuse_user_ptepages_count;
288 extern unsigned int inuse_kernel_ptepages_count;
289 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
290 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
291 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
292 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
293 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
294 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
295 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
296 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
297 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
298 extern unsigned int free_page_size_tt_count;
299 extern unsigned int free_two_page_size_tt_count;
300 extern unsigned int free_tt_count;
301 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
302 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
303 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
304 #if DEVELOPMENT || DEBUG
305 extern unsigned long pmap_asid_flushes;
306 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
307 extern unsigned long pmap_asid_hits;
308 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
309 extern unsigned long pmap_asid_misses;
310 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
311 #endif
312 #endif /* __arm64__ */
313
314 #if __arm64__
315 extern int fourk_pager_data_request_debug;
316 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
317 #endif /* __arm64__ */
318 #endif /* DEVELOPMENT || DEBUG */
319
320 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
321 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
323 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
324 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
325 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
326 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
327 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
328 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
329 #if VM_SCAN_FOR_SHADOW_CHAIN
330 static int vm_shadow_max_enabled = 0; /* Disabled by default */
331 extern int proc_shadow_max(void);
332 static int
333 vm_shadow_max SYSCTL_HANDLER_ARGS
334 {
335 #pragma unused(arg1, arg2, oidp)
336 int value = 0;
337
338 if (vm_shadow_max_enabled) {
339 value = proc_shadow_max();
340 }
341
342 return SYSCTL_OUT(req, &value, sizeof(value));
343 }
344 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
345 0, 0, &vm_shadow_max, "I", "");
346
347 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
348
349 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
350
351 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
352
353 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
354 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
355 /*
356 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
357 */
358
359 #if DEVELOPMENT || DEBUG
360 extern int allow_stack_exec, allow_data_exec;
361
362 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
363 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
364
365 #endif /* DEVELOPMENT || DEBUG */
366
367 static const char *prot_values[] = {
368 "none",
369 "read-only",
370 "write-only",
371 "read-write",
372 "execute-only",
373 "read-execute",
374 "write-execute",
375 "read-write-execute"
376 };
377
378 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)379 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
380 {
381 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
382 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
383 }
384
385 /*
386 * shared_region_unnest_logging: level of logging of unnesting events
387 * 0 - no logging
388 * 1 - throttled logging of unexpected unnesting events (default)
389 * 2 - unthrottled logging of unexpected unnesting events
390 * 3+ - unthrottled logging of all unnesting events
391 */
392 int shared_region_unnest_logging = 1;
393
394 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
395 &shared_region_unnest_logging, 0, "");
396
397 int vm_shared_region_unnest_log_interval = 10;
398 int shared_region_unnest_log_count_threshold = 5;
399
400
401 #if XNU_TARGET_OS_OSX
402
403 #if defined (__x86_64__)
404 static int scdir_enforce = 1;
405 #else /* defined (__x86_64__) */
406 static int scdir_enforce = 0; /* AOT caches live elsewhere */
407 #endif /* defined (__x86_64__) */
408
409 static char *scdir_path[] = {
410 "/System/Library/dyld/",
411 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
412 "/System/Cryptexes/OS/System/Library/dyld",
413 NULL
414 };
415
416 #else /* XNU_TARGET_OS_OSX */
417
418 static int scdir_enforce = 0;
419 static char *scdir_path[] = {
420 "/System/Library/Caches/com.apple.dyld/",
421 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
422 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
423 NULL
424 };
425
426 #endif /* XNU_TARGET_OS_OSX */
427
428 static char *driverkit_scdir_path[] = {
429 "/System/DriverKit/System/Library/dyld/",
430 #if XNU_TARGET_OS_OSX
431 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
432 #else
433 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
434 #endif /* XNU_TARGET_OS_OSX */
435 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
436 NULL
437 };
438
439 #ifndef SECURE_KERNEL
440 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
441 {
442 #if CONFIG_CSR
443 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
444 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
445 return EPERM;
446 }
447 #endif /* CONFIG_CSR */
448 return sysctl_handle_int(oidp, arg1, arg2, req);
449 }
450
451 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
452 #endif
453
454 /* These log rate throttling state variables aren't thread safe, but
455 * are sufficient unto the task.
456 */
457 static int64_t last_unnest_log_time = 0;
458 static int shared_region_unnest_log_count = 0;
459
460 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)461 log_unnest_badness(
462 vm_map_t m,
463 vm_map_offset_t s,
464 vm_map_offset_t e,
465 boolean_t is_nested_map,
466 vm_map_offset_t lowest_unnestable_addr)
467 {
468 struct timeval tv;
469
470 if (shared_region_unnest_logging == 0) {
471 return;
472 }
473
474 if (shared_region_unnest_logging <= 2 &&
475 is_nested_map &&
476 s >= lowest_unnestable_addr) {
477 /*
478 * Unnesting of writable map entries is fine.
479 */
480 return;
481 }
482
483 if (shared_region_unnest_logging <= 1) {
484 microtime(&tv);
485 if ((tv.tv_sec - last_unnest_log_time) <
486 vm_shared_region_unnest_log_interval) {
487 if (shared_region_unnest_log_count++ >
488 shared_region_unnest_log_count_threshold) {
489 return;
490 }
491 } else {
492 last_unnest_log_time = tv.tv_sec;
493 shared_region_unnest_log_count = 0;
494 }
495 }
496
497 DTRACE_VM4(log_unnest_badness,
498 vm_map_t, m,
499 vm_map_offset_t, s,
500 vm_map_offset_t, e,
501 vm_map_offset_t, lowest_unnestable_addr);
502 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
503 }
504
505 uint64_t
vm_purge_filebacked_pagers(void)506 vm_purge_filebacked_pagers(void)
507 {
508 uint64_t pages_purged;
509
510 pages_purged = 0;
511 pages_purged += apple_protect_pager_purge_all();
512 pages_purged += shared_region_pager_purge_all();
513 pages_purged += dyld_pager_purge_all();
514 #if DEVELOPMENT || DEBUG
515 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
516 #endif /* DEVELOPMENT || DEBUG */
517 return pages_purged;
518 }
519
520 int
useracc(user_addr_t addr,user_size_t len,int prot)521 useracc(
522 user_addr_t addr,
523 user_size_t len,
524 int prot)
525 {
526 vm_map_t map;
527
528 map = current_map();
529 return vm_map_check_protection(
530 map,
531 vm_map_trunc_page(addr,
532 vm_map_page_mask(map)),
533 vm_map_round_page(addr + len,
534 vm_map_page_mask(map)),
535 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
536 }
537
538 int
vslock(user_addr_t addr,user_size_t len)539 vslock(
540 user_addr_t addr,
541 user_size_t len)
542 {
543 kern_return_t kret;
544 vm_map_t map;
545
546 map = current_map();
547 kret = vm_map_wire_kernel(map,
548 vm_map_trunc_page(addr,
549 vm_map_page_mask(map)),
550 vm_map_round_page(addr + len,
551 vm_map_page_mask(map)),
552 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
553 FALSE);
554
555 switch (kret) {
556 case KERN_SUCCESS:
557 return 0;
558 case KERN_INVALID_ADDRESS:
559 case KERN_NO_SPACE:
560 return ENOMEM;
561 case KERN_PROTECTION_FAILURE:
562 return EACCES;
563 default:
564 return EINVAL;
565 }
566 }
567
568 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)569 vsunlock(
570 user_addr_t addr,
571 user_size_t len,
572 __unused int dirtied)
573 {
574 #if FIXME /* [ */
575 pmap_t pmap;
576 vm_page_t pg;
577 vm_map_offset_t vaddr;
578 ppnum_t paddr;
579 #endif /* FIXME ] */
580 kern_return_t kret;
581 vm_map_t map;
582
583 map = current_map();
584
585 #if FIXME /* [ */
586 if (dirtied) {
587 pmap = get_task_pmap(current_task());
588 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
589 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
590 vaddr += PAGE_SIZE) {
591 paddr = pmap_find_phys(pmap, vaddr);
592 pg = PHYS_TO_VM_PAGE(paddr);
593 vm_page_set_modified(pg);
594 }
595 }
596 #endif /* FIXME ] */
597 #ifdef lint
598 dirtied++;
599 #endif /* lint */
600 kret = vm_map_unwire(map,
601 vm_map_trunc_page(addr,
602 vm_map_page_mask(map)),
603 vm_map_round_page(addr + len,
604 vm_map_page_mask(map)),
605 FALSE);
606 switch (kret) {
607 case KERN_SUCCESS:
608 return 0;
609 case KERN_INVALID_ADDRESS:
610 case KERN_NO_SPACE:
611 return ENOMEM;
612 case KERN_PROTECTION_FAILURE:
613 return EACCES;
614 default:
615 return EINVAL;
616 }
617 }
618
619 int
subyte(user_addr_t addr,int byte)620 subyte(
621 user_addr_t addr,
622 int byte)
623 {
624 char character;
625
626 character = (char)byte;
627 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
628 }
629
630 int
suibyte(user_addr_t addr,int byte)631 suibyte(
632 user_addr_t addr,
633 int byte)
634 {
635 char character;
636
637 character = (char)byte;
638 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
639 }
640
641 int
fubyte(user_addr_t addr)642 fubyte(user_addr_t addr)
643 {
644 unsigned char byte;
645
646 if (copyin(addr, (void *) &byte, sizeof(char))) {
647 return -1;
648 }
649 return byte;
650 }
651
652 int
fuibyte(user_addr_t addr)653 fuibyte(user_addr_t addr)
654 {
655 unsigned char byte;
656
657 if (copyin(addr, (void *) &(byte), sizeof(char))) {
658 return -1;
659 }
660 return byte;
661 }
662
663 int
suword(user_addr_t addr,long word)664 suword(
665 user_addr_t addr,
666 long word)
667 {
668 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
669 }
670
671 long
fuword(user_addr_t addr)672 fuword(user_addr_t addr)
673 {
674 long word = 0;
675
676 if (copyin(addr, (void *) &word, sizeof(int))) {
677 return -1;
678 }
679 return word;
680 }
681
682 /* suiword and fuiword are the same as suword and fuword, respectively */
683
684 int
suiword(user_addr_t addr,long word)685 suiword(
686 user_addr_t addr,
687 long word)
688 {
689 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
690 }
691
692 long
fuiword(user_addr_t addr)693 fuiword(user_addr_t addr)
694 {
695 long word = 0;
696
697 if (copyin(addr, (void *) &word, sizeof(int))) {
698 return -1;
699 }
700 return word;
701 }
702
703 /*
704 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
705 * fetching and setting of process-sized size_t and pointer values.
706 */
707 int
sulong(user_addr_t addr,int64_t word)708 sulong(user_addr_t addr, int64_t word)
709 {
710 if (IS_64BIT_PROCESS(current_proc())) {
711 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
712 } else {
713 return suiword(addr, (long)word);
714 }
715 }
716
717 int64_t
fulong(user_addr_t addr)718 fulong(user_addr_t addr)
719 {
720 int64_t longword;
721
722 if (IS_64BIT_PROCESS(current_proc())) {
723 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
724 return -1;
725 }
726 return longword;
727 } else {
728 return (int64_t)fuiword(addr);
729 }
730 }
731
732 int
suulong(user_addr_t addr,uint64_t uword)733 suulong(user_addr_t addr, uint64_t uword)
734 {
735 if (IS_64BIT_PROCESS(current_proc())) {
736 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
737 } else {
738 return suiword(addr, (uint32_t)uword);
739 }
740 }
741
742 uint64_t
fuulong(user_addr_t addr)743 fuulong(user_addr_t addr)
744 {
745 uint64_t ulongword;
746
747 if (IS_64BIT_PROCESS(current_proc())) {
748 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
749 return -1ULL;
750 }
751 return ulongword;
752 } else {
753 return (uint64_t)fuiword(addr);
754 }
755 }
756
757 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)758 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
759 {
760 return ENOTSUP;
761 }
762
763 /*
764 * pid_for_task
765 *
766 * Find the BSD process ID for the Mach task associated with the given Mach port
767 * name
768 *
769 * Parameters: args User argument descriptor (see below)
770 *
771 * Indirect parameters: args->t Mach port name
772 * args->pid Process ID (returned value; see below)
773 *
774 * Returns: KERL_SUCCESS Success
775 * KERN_FAILURE Not success
776 *
777 * Implicit returns: args->pid Process ID
778 *
779 */
780 kern_return_t
pid_for_task(struct pid_for_task_args * args)781 pid_for_task(
782 struct pid_for_task_args *args)
783 {
784 mach_port_name_t t = args->t;
785 user_addr_t pid_addr = args->pid;
786 proc_t p;
787 task_t t1;
788 int pid = -1;
789 kern_return_t err = KERN_SUCCESS;
790
791 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
792 AUDIT_ARG(mach_port1, t);
793
794 t1 = port_name_to_task_name(t);
795
796 if (t1 == TASK_NULL) {
797 err = KERN_FAILURE;
798 goto pftout;
799 } else {
800 p = get_bsdtask_info(t1);
801 if (p) {
802 pid = proc_pid(p);
803 err = KERN_SUCCESS;
804 } else if (task_is_a_corpse(t1)) {
805 pid = task_pid(t1);
806 err = KERN_SUCCESS;
807 } else {
808 err = KERN_FAILURE;
809 }
810 }
811 task_deallocate(t1);
812 pftout:
813 AUDIT_ARG(pid, pid);
814 (void) copyout((char *) &pid, pid_addr, sizeof(int));
815 AUDIT_MACH_SYSCALL_EXIT(err);
816 return err;
817 }
818
819 /*
820 *
821 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
822 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
823 *
824 */
825 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
826
827 /*
828 * Routine: task_for_pid_posix_check
829 * Purpose:
830 * Verify that the current process should be allowed to
831 * get the target process's task port. This is only
832 * permitted if:
833 * - The current process is root
834 * OR all of the following are true:
835 * - The target process's real, effective, and saved uids
836 * are the same as the current proc's euid,
837 * - The target process's group set is a subset of the
838 * calling process's group set, and
839 * - The target process hasn't switched credentials.
840 *
841 * Returns: TRUE: permitted
842 * FALSE: denied
843 */
844 static int
task_for_pid_posix_check(proc_t target)845 task_for_pid_posix_check(proc_t target)
846 {
847 kauth_cred_t targetcred, mycred;
848 bool checkcredentials;
849 uid_t myuid;
850 int allowed;
851
852 /* No task_for_pid on bad targets */
853 if (target->p_stat == SZOMB) {
854 return FALSE;
855 }
856
857 mycred = kauth_cred_get();
858 myuid = kauth_cred_getuid(mycred);
859
860 /* If we're running as root, the check passes */
861 if (kauth_cred_issuser(mycred)) {
862 return TRUE;
863 }
864
865 /* We're allowed to get our own task port */
866 if (target == current_proc()) {
867 return TRUE;
868 }
869
870 /*
871 * Under DENY, only root can get another proc's task port,
872 * so no more checks are needed.
873 */
874 if (tfp_policy == KERN_TFP_POLICY_DENY) {
875 return FALSE;
876 }
877
878 targetcred = kauth_cred_proc_ref(target);
879 allowed = TRUE;
880
881 checkcredentials = !proc_is_third_party_debuggable_driver(target);
882
883 if (checkcredentials) {
884 /* Do target's ruid, euid, and saved uid match my euid? */
885 if ((kauth_cred_getuid(targetcred) != myuid) ||
886 (kauth_cred_getruid(targetcred) != myuid) ||
887 (kauth_cred_getsvuid(targetcred) != myuid)) {
888 allowed = FALSE;
889 goto out;
890 }
891 /* Are target's groups a subset of my groups? */
892 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
893 allowed == 0) {
894 allowed = FALSE;
895 goto out;
896 }
897 }
898
899 /* Has target switched credentials? */
900 if (target->p_flag & P_SUGID) {
901 allowed = FALSE;
902 goto out;
903 }
904
905 out:
906 kauth_cred_unref(&targetcred);
907 return allowed;
908 }
909
910 /*
911 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
912 *
913 * Description: Waits for the user space daemon to respond to the request
914 * we made. Function declared non inline to be visible in
915 * stackshots and spindumps as well as debugging.
916 */
917 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)918 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
919 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
920 {
921 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
922 }
923
924 /*
925 * Routine: task_for_pid
926 * Purpose:
927 * Get the task port for another "process", named by its
928 * process ID on the same host as "target_task".
929 *
930 * Only permitted to privileged processes, or processes
931 * with the same user ID.
932 *
933 * Note: if pid == 0, an error is return no matter who is calling.
934 *
935 * XXX This should be a BSD system call, not a Mach trap!!!
936 */
937 kern_return_t
task_for_pid(struct task_for_pid_args * args)938 task_for_pid(
939 struct task_for_pid_args *args)
940 {
941 mach_port_name_t target_tport = args->target_tport;
942 int pid = args->pid;
943 user_addr_t task_addr = args->t;
944 proc_t p = PROC_NULL;
945 task_t t1 = TASK_NULL;
946 task_t task = TASK_NULL;
947 mach_port_name_t tret = MACH_PORT_NULL;
948 ipc_port_t tfpport = MACH_PORT_NULL;
949 void * sright = NULL;
950 int error = 0;
951 boolean_t is_current_proc = FALSE;
952 struct proc_ident pident = {0};
953
954 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
955 AUDIT_ARG(pid, pid);
956 AUDIT_ARG(mach_port1, target_tport);
957
958 /* Always check if pid == 0 */
959 if (pid == 0) {
960 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
961 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
962 return KERN_FAILURE;
963 }
964
965 t1 = port_name_to_task(target_tport);
966 if (t1 == TASK_NULL) {
967 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
968 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
969 return KERN_FAILURE;
970 }
971
972
973 p = proc_find(pid);
974 if (p == PROC_NULL) {
975 error = KERN_FAILURE;
976 goto tfpout;
977 }
978 pident = proc_ident(p);
979 is_current_proc = (p == current_proc());
980
981 #if CONFIG_AUDIT
982 AUDIT_ARG(process, p);
983 #endif
984
985 if (!(task_for_pid_posix_check(p))) {
986 error = KERN_FAILURE;
987 goto tfpout;
988 }
989
990 if (proc_task(p) == TASK_NULL) {
991 error = KERN_SUCCESS;
992 goto tfpout;
993 }
994
995 /*
996 * Grab a task reference and drop the proc reference as the proc ref
997 * shouldn't be held accross upcalls.
998 */
999 task = proc_task(p);
1000 task_reference(task);
1001
1002 proc_rele(p);
1003 p = PROC_NULL;
1004
1005 #if CONFIG_MACF
1006 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1007 if (error) {
1008 error = KERN_FAILURE;
1009 goto tfpout;
1010 }
1011 #endif
1012
1013 /* If we aren't root and target's task access port is set... */
1014 if (!kauth_cred_issuser(kauth_cred_get()) &&
1015 !is_current_proc &&
1016 (task_get_task_access_port(task, &tfpport) == 0) &&
1017 (tfpport != IPC_PORT_NULL)) {
1018 if (tfpport == IPC_PORT_DEAD) {
1019 error = KERN_PROTECTION_FAILURE;
1020 goto tfpout;
1021 }
1022
1023 /* Call up to the task access server */
1024 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1025 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1026
1027 if (error != MACH_MSG_SUCCESS) {
1028 if (error == MACH_RCV_INTERRUPTED) {
1029 error = KERN_ABORTED;
1030 } else {
1031 error = KERN_FAILURE;
1032 }
1033 goto tfpout;
1034 }
1035 }
1036
1037 /* Grant task port access */
1038 extmod_statistics_incr_task_for_pid(task);
1039
1040 /* this reference will be consumed during conversion */
1041 task_reference(task);
1042 if (task == current_task()) {
1043 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
1044 sright = (void *)convert_task_to_port_pinned(task);
1045 } else {
1046 sright = (void *)convert_task_to_port(task);
1047 }
1048 /* extra task ref consumed */
1049
1050 /*
1051 * Check if the task has been corpsified. We must do so after conversion
1052 * since we don't hold locks and may have grabbed a corpse control port
1053 * above which will prevent no-senders notification delivery.
1054 */
1055 if (task_is_a_corpse(task)) {
1056 ipc_port_release_send(sright);
1057 error = KERN_FAILURE;
1058 goto tfpout;
1059 }
1060
1061 tret = ipc_port_copyout_send(
1062 sright,
1063 get_task_ipcspace(current_task()));
1064
1065 error = KERN_SUCCESS;
1066
1067 tfpout:
1068 task_deallocate(t1);
1069 AUDIT_ARG(mach_port2, tret);
1070 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1071
1072 if (tfpport != IPC_PORT_NULL) {
1073 ipc_port_release_send(tfpport);
1074 }
1075 if (task != TASK_NULL) {
1076 task_deallocate(task);
1077 }
1078 if (p != PROC_NULL) {
1079 proc_rele(p);
1080 }
1081 AUDIT_MACH_SYSCALL_EXIT(error);
1082 return error;
1083 }
1084
1085 /*
1086 * Routine: task_name_for_pid
1087 * Purpose:
1088 * Get the task name port for another "process", named by its
1089 * process ID on the same host as "target_task".
1090 *
1091 * Only permitted to privileged processes, or processes
1092 * with the same user ID.
1093 *
1094 * XXX This should be a BSD system call, not a Mach trap!!!
1095 */
1096
1097 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1098 task_name_for_pid(
1099 struct task_name_for_pid_args *args)
1100 {
1101 mach_port_name_t target_tport = args->target_tport;
1102 int pid = args->pid;
1103 user_addr_t task_addr = args->t;
1104 proc_t p = PROC_NULL;
1105 task_t t1 = TASK_NULL;
1106 mach_port_name_t tret = MACH_PORT_NULL;
1107 void * sright;
1108 int error = 0, refheld = 0;
1109 kauth_cred_t target_cred;
1110
1111 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1112 AUDIT_ARG(pid, pid);
1113 AUDIT_ARG(mach_port1, target_tport);
1114
1115 t1 = port_name_to_task(target_tport);
1116 if (t1 == TASK_NULL) {
1117 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1118 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1119 return KERN_FAILURE;
1120 }
1121
1122 p = proc_find(pid);
1123 if (p != PROC_NULL) {
1124 AUDIT_ARG(process, p);
1125 target_cred = kauth_cred_proc_ref(p);
1126 refheld = 1;
1127
1128 if ((p->p_stat != SZOMB)
1129 && ((current_proc() == p)
1130 || kauth_cred_issuser(kauth_cred_get())
1131 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1132 ((kauth_cred_getruid(target_cred) == kauth_getruid())))
1133 || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
1134 )) {
1135 if (proc_task(p) != TASK_NULL) {
1136 struct proc_ident pident = proc_ident(p);
1137
1138 task_t task = proc_task(p);
1139
1140 task_reference(task);
1141 proc_rele(p);
1142 p = PROC_NULL;
1143 #if CONFIG_MACF
1144 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1145 if (error) {
1146 task_deallocate(task);
1147 goto noperm;
1148 }
1149 #endif
1150 sright = (void *)convert_task_name_to_port(task);
1151 task = NULL;
1152 tret = ipc_port_copyout_send(sright,
1153 get_task_ipcspace(current_task()));
1154 } else {
1155 tret = MACH_PORT_NULL;
1156 }
1157
1158 AUDIT_ARG(mach_port2, tret);
1159 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1160 task_deallocate(t1);
1161 error = KERN_SUCCESS;
1162 goto tnfpout;
1163 }
1164 }
1165
1166 #if CONFIG_MACF
1167 noperm:
1168 #endif
1169 task_deallocate(t1);
1170 tret = MACH_PORT_NULL;
1171 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1172 error = KERN_FAILURE;
1173 tnfpout:
1174 if (refheld != 0) {
1175 kauth_cred_unref(&target_cred);
1176 }
1177 if (p != PROC_NULL) {
1178 proc_rele(p);
1179 }
1180 AUDIT_MACH_SYSCALL_EXIT(error);
1181 return error;
1182 }
1183
1184 /*
1185 * Routine: task_inspect_for_pid
1186 * Purpose:
1187 * Get the task inspect port for another "process", named by its
1188 * process ID on the same host as "target_task".
1189 */
1190 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1191 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1192 {
1193 mach_port_name_t target_tport = args->target_tport;
1194 int pid = args->pid;
1195 user_addr_t task_addr = args->t;
1196
1197 proc_t proc = PROC_NULL;
1198 task_t t1 = TASK_NULL;
1199 task_inspect_t task_insp = TASK_INSPECT_NULL;
1200 mach_port_name_t tret = MACH_PORT_NULL;
1201 ipc_port_t tfpport = MACH_PORT_NULL;
1202 int error = 0;
1203 void *sright = NULL;
1204 boolean_t is_current_proc = FALSE;
1205 struct proc_ident pident = {0};
1206
1207 /* Disallow inspect port for kernel_task */
1208 if (pid == 0) {
1209 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1210 return EPERM;
1211 }
1212
1213 t1 = port_name_to_task(target_tport);
1214 if (t1 == TASK_NULL) {
1215 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1216 return EINVAL;
1217 }
1218
1219 proc = proc_find(pid);
1220 if (proc == PROC_NULL) {
1221 error = ESRCH;
1222 goto tifpout;
1223 }
1224 pident = proc_ident(proc);
1225 is_current_proc = (proc == current_proc());
1226
1227 if (!(task_for_pid_posix_check(proc))) {
1228 error = EPERM;
1229 goto tifpout;
1230 }
1231
1232 task_insp = proc_task(proc);
1233 if (task_insp == TASK_INSPECT_NULL) {
1234 goto tifpout;
1235 }
1236
1237 /*
1238 * Grab a task reference and drop the proc reference before making any upcalls.
1239 */
1240 task_reference(task_insp);
1241
1242 proc_rele(proc);
1243 proc = PROC_NULL;
1244
1245 #if CONFIG_MACF
1246 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1247 if (error) {
1248 error = EPERM;
1249 goto tifpout;
1250 }
1251 #endif
1252
1253 /* If we aren't root and target's task access port is set... */
1254 if (!kauth_cred_issuser(kauth_cred_get()) &&
1255 !is_current_proc &&
1256 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1257 (tfpport != IPC_PORT_NULL)) {
1258 if (tfpport == IPC_PORT_DEAD) {
1259 error = EACCES;
1260 goto tifpout;
1261 }
1262
1263
1264 /* Call up to the task access server */
1265 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1266 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1267
1268 if (error != MACH_MSG_SUCCESS) {
1269 if (error == MACH_RCV_INTERRUPTED) {
1270 error = EINTR;
1271 } else {
1272 error = EPERM;
1273 }
1274 goto tifpout;
1275 }
1276 }
1277
1278 /* Check if the task has been corpsified */
1279 if (task_is_a_corpse(task_insp)) {
1280 error = EACCES;
1281 goto tifpout;
1282 }
1283
1284 /* could be IP_NULL, consumes a ref */
1285 sright = (void*) convert_task_inspect_to_port(task_insp);
1286 task_insp = TASK_INSPECT_NULL;
1287 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1288
1289 tifpout:
1290 task_deallocate(t1);
1291 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1292 if (proc != PROC_NULL) {
1293 proc_rele(proc);
1294 }
1295 if (tfpport != IPC_PORT_NULL) {
1296 ipc_port_release_send(tfpport);
1297 }
1298 if (task_insp != TASK_INSPECT_NULL) {
1299 task_deallocate(task_insp);
1300 }
1301
1302 *ret = error;
1303 return error;
1304 }
1305
1306 /*
1307 * Routine: task_read_for_pid
1308 * Purpose:
1309 * Get the task read port for another "process", named by its
1310 * process ID on the same host as "target_task".
1311 */
1312 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1313 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1314 {
1315 mach_port_name_t target_tport = args->target_tport;
1316 int pid = args->pid;
1317 user_addr_t task_addr = args->t;
1318
1319 proc_t proc = PROC_NULL;
1320 task_t t1 = TASK_NULL;
1321 task_read_t task_read = TASK_READ_NULL;
1322 mach_port_name_t tret = MACH_PORT_NULL;
1323 ipc_port_t tfpport = MACH_PORT_NULL;
1324 int error = 0;
1325 void *sright = NULL;
1326 boolean_t is_current_proc = FALSE;
1327 struct proc_ident pident = {0};
1328
1329 /* Disallow read port for kernel_task */
1330 if (pid == 0) {
1331 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1332 return EPERM;
1333 }
1334
1335 t1 = port_name_to_task(target_tport);
1336 if (t1 == TASK_NULL) {
1337 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1338 return EINVAL;
1339 }
1340
1341 proc = proc_find(pid);
1342 if (proc == PROC_NULL) {
1343 error = ESRCH;
1344 goto trfpout;
1345 }
1346 pident = proc_ident(proc);
1347 is_current_proc = (proc == current_proc());
1348
1349 if (!(task_for_pid_posix_check(proc))) {
1350 error = EPERM;
1351 goto trfpout;
1352 }
1353
1354 task_read = proc_task(proc);
1355 if (task_read == TASK_INSPECT_NULL) {
1356 goto trfpout;
1357 }
1358
1359 /*
1360 * Grab a task reference and drop the proc reference before making any upcalls.
1361 */
1362 task_reference(task_read);
1363
1364 proc_rele(proc);
1365 proc = PROC_NULL;
1366
1367 #if CONFIG_MACF
1368 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1369 if (error) {
1370 error = EPERM;
1371 goto trfpout;
1372 }
1373 #endif
1374
1375 /* If we aren't root and target's task access port is set... */
1376 if (!kauth_cred_issuser(kauth_cred_get()) &&
1377 !is_current_proc &&
1378 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1379 (tfpport != IPC_PORT_NULL)) {
1380 if (tfpport == IPC_PORT_DEAD) {
1381 error = EACCES;
1382 goto trfpout;
1383 }
1384
1385
1386 /* Call up to the task access server */
1387 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1388 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1389
1390 if (error != MACH_MSG_SUCCESS) {
1391 if (error == MACH_RCV_INTERRUPTED) {
1392 error = EINTR;
1393 } else {
1394 error = EPERM;
1395 }
1396 goto trfpout;
1397 }
1398 }
1399
1400 /* Check if the task has been corpsified */
1401 if (task_is_a_corpse(task_read)) {
1402 error = EACCES;
1403 goto trfpout;
1404 }
1405
1406 /* could be IP_NULL, consumes a ref */
1407 sright = (void*) convert_task_read_to_port(task_read);
1408 task_read = TASK_READ_NULL;
1409 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1410
1411 trfpout:
1412 task_deallocate(t1);
1413 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1414 if (proc != PROC_NULL) {
1415 proc_rele(proc);
1416 }
1417 if (tfpport != IPC_PORT_NULL) {
1418 ipc_port_release_send(tfpport);
1419 }
1420 if (task_read != TASK_READ_NULL) {
1421 task_deallocate(task_read);
1422 }
1423
1424 *ret = error;
1425 return error;
1426 }
1427
1428 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1429 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1430 {
1431 task_t target = NULL;
1432 proc_t targetproc = PROC_NULL;
1433 int pid = args->pid;
1434 int error = 0;
1435 mach_port_t tfpport = MACH_PORT_NULL;
1436
1437 if (pid == 0) {
1438 error = EPERM;
1439 goto out;
1440 }
1441
1442 targetproc = proc_find(pid);
1443 if (targetproc == PROC_NULL) {
1444 error = ESRCH;
1445 goto out;
1446 }
1447
1448 if (!task_for_pid_posix_check(targetproc) &&
1449 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1450 error = EPERM;
1451 goto out;
1452 }
1453
1454 #if CONFIG_MACF
1455 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1456 if (error) {
1457 error = EPERM;
1458 goto out;
1459 }
1460 #endif
1461
1462 target = proc_task(targetproc);
1463 #if XNU_TARGET_OS_OSX
1464 if (target != TASK_NULL) {
1465 /* If we aren't root and target's task access port is set... */
1466 if (!kauth_cred_issuser(kauth_cred_get()) &&
1467 targetproc != current_proc() &&
1468 (task_get_task_access_port(target, &tfpport) == 0) &&
1469 (tfpport != IPC_PORT_NULL)) {
1470 if (tfpport == IPC_PORT_DEAD) {
1471 error = EACCES;
1472 goto out;
1473 }
1474
1475 /* Call up to the task access server */
1476 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1477 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1478
1479 if (error != MACH_MSG_SUCCESS) {
1480 if (error == MACH_RCV_INTERRUPTED) {
1481 error = EINTR;
1482 } else {
1483 error = EPERM;
1484 }
1485 goto out;
1486 }
1487 }
1488 }
1489 #endif /* XNU_TARGET_OS_OSX */
1490
1491 task_reference(target);
1492 error = task_pidsuspend(target);
1493 if (error) {
1494 if (error == KERN_INVALID_ARGUMENT) {
1495 error = EINVAL;
1496 } else {
1497 error = EPERM;
1498 }
1499 }
1500 #if CONFIG_MEMORYSTATUS
1501 else {
1502 memorystatus_on_suspend(targetproc);
1503 }
1504 #endif
1505
1506 task_deallocate(target);
1507
1508 out:
1509 if (tfpport != IPC_PORT_NULL) {
1510 ipc_port_release_send(tfpport);
1511 }
1512
1513 if (targetproc != PROC_NULL) {
1514 proc_rele(targetproc);
1515 }
1516 *ret = error;
1517 return error;
1518 }
1519
1520 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1521 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1522 {
1523 mach_port_name_t target_tport = args->target_tport;
1524 int pid = args->pid;
1525 user_addr_t task_addr = args->t;
1526 proc_t p = PROC_NULL;
1527 task_t t1 = TASK_NULL;
1528 task_t task = TASK_NULL;
1529 mach_port_name_t tret = MACH_PORT_NULL;
1530 ipc_port_t tfpport = MACH_PORT_NULL;
1531 ipc_port_t sright = NULL;
1532 int error = 0;
1533 boolean_t is_current_proc = FALSE;
1534 struct proc_ident pident = {0};
1535
1536 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1537 AUDIT_ARG(pid, pid);
1538 AUDIT_ARG(mach_port1, target_tport);
1539
1540 /* Always check if pid == 0 */
1541 if (pid == 0) {
1542 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1543 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1544 return KERN_FAILURE;
1545 }
1546
1547 t1 = port_name_to_task(target_tport);
1548 if (t1 == TASK_NULL) {
1549 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1550 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1551 return KERN_FAILURE;
1552 }
1553
1554 p = proc_find(pid);
1555 if (p == PROC_NULL) {
1556 error = KERN_FAILURE;
1557 goto tfpout;
1558 }
1559 pident = proc_ident(p);
1560 is_current_proc = (p == current_proc());
1561
1562 #if CONFIG_AUDIT
1563 AUDIT_ARG(process, p);
1564 #endif
1565
1566 if (!(task_for_pid_posix_check(p))) {
1567 error = KERN_FAILURE;
1568 goto tfpout;
1569 }
1570
1571 if (proc_task(p) == TASK_NULL) {
1572 error = KERN_SUCCESS;
1573 goto tfpout;
1574 }
1575
1576 /*
1577 * Grab a task reference and drop the proc reference before making any upcalls.
1578 */
1579 task = proc_task(p);
1580 task_reference(task);
1581
1582 proc_rele(p);
1583 p = PROC_NULL;
1584
1585 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1586 #if CONFIG_MACF
1587 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1588 if (error) {
1589 error = KERN_FAILURE;
1590 goto tfpout;
1591 }
1592 #endif
1593
1594 /* If we aren't root and target's task access port is set... */
1595 if (!kauth_cred_issuser(kauth_cred_get()) &&
1596 !is_current_proc &&
1597 (task_get_task_access_port(task, &tfpport) == 0) &&
1598 (tfpport != IPC_PORT_NULL)) {
1599 if (tfpport == IPC_PORT_DEAD) {
1600 error = KERN_PROTECTION_FAILURE;
1601 goto tfpout;
1602 }
1603
1604
1605 /* Call up to the task access server */
1606 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1607 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1608
1609 if (error != MACH_MSG_SUCCESS) {
1610 if (error == MACH_RCV_INTERRUPTED) {
1611 error = KERN_ABORTED;
1612 } else {
1613 error = KERN_FAILURE;
1614 }
1615 goto tfpout;
1616 }
1617 }
1618 }
1619
1620 /* Check if the task has been corpsified */
1621 if (task_is_a_corpse(task)) {
1622 error = KERN_FAILURE;
1623 goto tfpout;
1624 }
1625
1626 error = task_get_debug_control_port(task, &sright);
1627 if (error != KERN_SUCCESS) {
1628 goto tfpout;
1629 }
1630
1631 tret = ipc_port_copyout_send(
1632 sright,
1633 get_task_ipcspace(current_task()));
1634
1635 error = KERN_SUCCESS;
1636
1637 tfpout:
1638 task_deallocate(t1);
1639 AUDIT_ARG(mach_port2, tret);
1640 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1641
1642 if (tfpport != IPC_PORT_NULL) {
1643 ipc_port_release_send(tfpport);
1644 }
1645 if (task != TASK_NULL) {
1646 task_deallocate(task);
1647 }
1648 if (p != PROC_NULL) {
1649 proc_rele(p);
1650 }
1651 AUDIT_MACH_SYSCALL_EXIT(error);
1652 return error;
1653 }
1654
1655 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1656 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1657 {
1658 task_t target = NULL;
1659 proc_t targetproc = PROC_NULL;
1660 int pid = args->pid;
1661 int error = 0;
1662 mach_port_t tfpport = MACH_PORT_NULL;
1663
1664 if (pid == 0) {
1665 error = EPERM;
1666 goto out;
1667 }
1668
1669 targetproc = proc_find(pid);
1670 if (targetproc == PROC_NULL) {
1671 error = ESRCH;
1672 goto out;
1673 }
1674
1675 if (!task_for_pid_posix_check(targetproc) &&
1676 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1677 error = EPERM;
1678 goto out;
1679 }
1680
1681 #if CONFIG_MACF
1682 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1683 if (error) {
1684 error = EPERM;
1685 goto out;
1686 }
1687 #endif
1688
1689 target = proc_task(targetproc);
1690 #if XNU_TARGET_OS_OSX
1691 if (target != TASK_NULL) {
1692 /* If we aren't root and target's task access port is set... */
1693 if (!kauth_cred_issuser(kauth_cred_get()) &&
1694 targetproc != current_proc() &&
1695 (task_get_task_access_port(target, &tfpport) == 0) &&
1696 (tfpport != IPC_PORT_NULL)) {
1697 if (tfpport == IPC_PORT_DEAD) {
1698 error = EACCES;
1699 goto out;
1700 }
1701
1702 /* Call up to the task access server */
1703 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1704 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1705
1706 if (error != MACH_MSG_SUCCESS) {
1707 if (error == MACH_RCV_INTERRUPTED) {
1708 error = EINTR;
1709 } else {
1710 error = EPERM;
1711 }
1712 goto out;
1713 }
1714 }
1715 }
1716 #endif /* XNU_TARGET_OS_OSX */
1717
1718 #if !XNU_TARGET_OS_OSX
1719 #if SOCKETS
1720 resume_proc_sockets(targetproc);
1721 #endif /* SOCKETS */
1722 #endif /* !XNU_TARGET_OS_OSX */
1723
1724 task_reference(target);
1725
1726 #if CONFIG_MEMORYSTATUS
1727 memorystatus_on_resume(targetproc);
1728 #endif
1729
1730 error = task_pidresume(target);
1731 if (error) {
1732 if (error == KERN_INVALID_ARGUMENT) {
1733 error = EINVAL;
1734 } else {
1735 if (error == KERN_MEMORY_ERROR) {
1736 psignal(targetproc, SIGKILL);
1737 error = EIO;
1738 } else {
1739 error = EPERM;
1740 }
1741 }
1742 }
1743
1744 task_deallocate(target);
1745
1746 out:
1747 if (tfpport != IPC_PORT_NULL) {
1748 ipc_port_release_send(tfpport);
1749 }
1750
1751 if (targetproc != PROC_NULL) {
1752 proc_rele(targetproc);
1753 }
1754
1755 *ret = error;
1756 return error;
1757 }
1758
1759 #if !XNU_TARGET_OS_OSX
1760 /*
1761 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1762 * When a process is specified, this call is blocking, otherwise we wake up the
1763 * freezer thread and do not block on a process being frozen.
1764 */
1765 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1766 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1767 {
1768 int error = 0;
1769 proc_t targetproc = PROC_NULL;
1770 int pid = args->pid;
1771
1772 #ifndef CONFIG_FREEZE
1773 #pragma unused(pid)
1774 #else
1775
1776 /*
1777 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1778 */
1779
1780 if (pid >= 0) {
1781 targetproc = proc_find(pid);
1782
1783 if (targetproc == PROC_NULL) {
1784 error = ESRCH;
1785 goto out;
1786 }
1787
1788 if (!task_for_pid_posix_check(targetproc)) {
1789 error = EPERM;
1790 goto out;
1791 }
1792 }
1793
1794 #if CONFIG_MACF
1795 //Note that targetproc may be null
1796 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1797 if (error) {
1798 error = EPERM;
1799 goto out;
1800 }
1801 #endif
1802
1803 if (pid == -2) {
1804 vm_pageout_anonymous_pages();
1805 } else if (pid == -1) {
1806 memorystatus_on_inactivity(targetproc);
1807 } else {
1808 error = memorystatus_freeze_process_sync(targetproc);
1809 }
1810
1811 out:
1812
1813 #endif /* CONFIG_FREEZE */
1814
1815 if (targetproc != PROC_NULL) {
1816 proc_rele(targetproc);
1817 }
1818 *ret = error;
1819 return error;
1820 }
1821 #endif /* !XNU_TARGET_OS_OSX */
1822
1823 #if SOCKETS
1824 int
networking_memstatus_callout(proc_t p,uint32_t status)1825 networking_memstatus_callout(proc_t p, uint32_t status)
1826 {
1827 struct fileproc *fp;
1828
1829 /*
1830 * proc list lock NOT held
1831 * proc lock NOT held
1832 * a reference on the proc has been held / shall be dropped by the caller.
1833 */
1834 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1835 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1836
1837 proc_fdlock(p);
1838
1839 fdt_foreach(fp, p) {
1840 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1841 #if NECP
1842 case DTYPE_NETPOLICY:
1843 necp_fd_memstatus(p, status,
1844 (struct necp_fd_data *)fp_get_data(fp));
1845 break;
1846 #endif /* NECP */
1847 #if SKYWALK
1848 case DTYPE_CHANNEL:
1849 kern_channel_memstatus(p, status,
1850 (struct kern_channel *)fp_get_data(fp));
1851 break;
1852 #endif /* SKYWALK */
1853 default:
1854 break;
1855 }
1856 }
1857 proc_fdunlock(p);
1858
1859 return 1;
1860 }
1861
1862 #if SKYWALK
1863 /*
1864 * Since we make multiple passes across the fileproc array, record the
1865 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1866 * large enough to accomodate most, if not all cases. If we find more,
1867 * we'll go to the slow path during second pass.
1868 */
1869 #define MAX_CHANNELS 8 /* should be more than enough */
1870 #endif /* SKYWALK */
1871
1872 static int
networking_defunct_callout(proc_t p,void * arg)1873 networking_defunct_callout(proc_t p, void *arg)
1874 {
1875 struct pid_shutdown_sockets_args *args = arg;
1876 int pid = args->pid;
1877 int level = args->level;
1878 struct fileproc *fp;
1879 #if SKYWALK
1880 int i;
1881 int channel_count = 0;
1882 struct kern_channel *channel_array[MAX_CHANNELS];
1883
1884 bzero(&channel_array, sizeof(channel_array));
1885 #endif /* SKYWALK */
1886
1887 proc_fdlock(p);
1888
1889 fdt_foreach(fp, p) {
1890 struct fileglob *fg = fp->fp_glob;
1891
1892 switch (FILEGLOB_DTYPE(fg)) {
1893 case DTYPE_SOCKET: {
1894 struct socket *so = (struct socket *)fg_get_data(fg);
1895 if (proc_getpid(p) == pid || so->last_pid == pid ||
1896 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1897 /* Call networking stack with socket and level */
1898 (void)socket_defunct(p, so, level);
1899 }
1900 break;
1901 }
1902 #if NECP
1903 case DTYPE_NETPOLICY:
1904 /* first pass: defunct necp and get stats for ntstat */
1905 if (proc_getpid(p) == pid) {
1906 necp_fd_defunct(p,
1907 (struct necp_fd_data *)fg_get_data(fg));
1908 }
1909 break;
1910 #endif /* NECP */
1911 #if SKYWALK
1912 case DTYPE_CHANNEL:
1913 /* first pass: get channels and total count */
1914 if (proc_getpid(p) == pid) {
1915 if (channel_count < MAX_CHANNELS) {
1916 channel_array[channel_count] =
1917 (struct kern_channel *)fg_get_data(fg);
1918 }
1919 ++channel_count;
1920 }
1921 break;
1922 #endif /* SKYWALK */
1923 default:
1924 break;
1925 }
1926 }
1927
1928 #if SKYWALK
1929 /*
1930 * Second pass: defunct channels/flows (after NECP). Handle
1931 * the common case of up to MAX_CHANNELS count with fast path,
1932 * and traverse the fileproc array again only if we exceed it.
1933 */
1934 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1935 ASSERT(proc_getpid(p) == pid);
1936 for (i = 0; i < channel_count; i++) {
1937 ASSERT(channel_array[i] != NULL);
1938 kern_channel_defunct(p, channel_array[i]);
1939 }
1940 } else if (channel_count != 0) {
1941 ASSERT(proc_getpid(p) == pid);
1942 fdt_foreach(fp, p) {
1943 struct fileglob *fg = fp->fp_glob;
1944
1945 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1946 kern_channel_defunct(p,
1947 (struct kern_channel *)fg_get_data(fg));
1948 }
1949 }
1950 }
1951 #endif /* SKYWALK */
1952 proc_fdunlock(p);
1953
1954 return PROC_RETURNED;
1955 }
1956
1957 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1958 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1959 {
1960 int error = 0;
1961 proc_t targetproc = PROC_NULL;
1962 int pid = args->pid;
1963 int level = args->level;
1964
1965 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1966 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1967 error = EINVAL;
1968 goto out;
1969 }
1970
1971 targetproc = proc_find(pid);
1972 if (targetproc == PROC_NULL) {
1973 error = ESRCH;
1974 goto out;
1975 }
1976
1977 if (!task_for_pid_posix_check(targetproc) &&
1978 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1979 error = EPERM;
1980 goto out;
1981 }
1982
1983 #if CONFIG_MACF
1984 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1985 if (error) {
1986 error = EPERM;
1987 goto out;
1988 }
1989 #endif
1990
1991 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1992 networking_defunct_callout, args, NULL, NULL);
1993
1994 out:
1995 if (targetproc != PROC_NULL) {
1996 proc_rele(targetproc);
1997 }
1998 *ret = error;
1999 return error;
2000 }
2001
2002 #endif /* SOCKETS */
2003
2004 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)2005 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2006 __unused int arg2, struct sysctl_req *req)
2007 {
2008 int error = 0;
2009 int new_value;
2010
2011 error = SYSCTL_OUT(req, arg1, sizeof(int));
2012 if (error || req->newptr == USER_ADDR_NULL) {
2013 return error;
2014 }
2015
2016 if (!kauth_cred_issuser(kauth_cred_get())) {
2017 return EPERM;
2018 }
2019
2020 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2021 goto out;
2022 }
2023 if ((new_value == KERN_TFP_POLICY_DENY)
2024 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2025 tfp_policy = new_value;
2026 } else {
2027 error = EINVAL;
2028 }
2029 out:
2030 return error;
2031 }
2032
2033 #if defined(SECURE_KERNEL)
2034 static int kern_secure_kernel = 1;
2035 #else
2036 static int kern_secure_kernel = 0;
2037 #endif
2038
2039 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2040
2041 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2042 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2043 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2044
2045 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2046 &shared_region_trace_level, 0, "");
2047 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2048 &shared_region_version, 0, "");
2049 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2050 &shared_region_persistence, 0, "");
2051
2052 /*
2053 * shared_region_check_np:
2054 *
2055 * This system call is intended for dyld.
2056 *
2057 * dyld calls this when any process starts to see if the process's shared
2058 * region is already set up and ready to use.
2059 * This call returns the base address of the first mapping in the
2060 * process's shared region's first mapping.
2061 * dyld will then check what's mapped at that address.
2062 *
2063 * If the shared region is empty, dyld will then attempt to map the shared
2064 * cache file in the shared region via the shared_region_map_np() system call.
2065 *
2066 * If something's already mapped in the shared region, dyld will check if it
2067 * matches the shared cache it would like to use for that process.
2068 * If it matches, evrything's ready and the process can proceed and use the
2069 * shared region.
2070 * If it doesn't match, dyld will unmap the shared region and map the shared
2071 * cache into the process's address space via mmap().
2072 *
2073 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2074 * the shared region. We will remove the shared_region reference from the task.
2075 *
2076 * ERROR VALUES
2077 * EINVAL no shared region
2078 * ENOMEM shared region is empty
2079 * EFAULT bad address for "start_address"
2080 */
2081 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2082 shared_region_check_np(
2083 __unused struct proc *p,
2084 struct shared_region_check_np_args *uap,
2085 __unused int *retvalp)
2086 {
2087 vm_shared_region_t shared_region;
2088 mach_vm_offset_t start_address = 0;
2089 int error = 0;
2090 kern_return_t kr;
2091 task_t task = current_task();
2092
2093 SHARED_REGION_TRACE_DEBUG(
2094 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2095 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2096 proc_getpid(p), p->p_comm,
2097 (uint64_t)uap->start_address));
2098
2099 /*
2100 * Special value of start_address used to indicate that map_with_linking() should
2101 * no longer be allowed in this process
2102 */
2103 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2104 p->p_disallow_map_with_linking = TRUE;
2105 return 0;
2106 }
2107
2108 /* retrieve the current tasks's shared region */
2109 shared_region = vm_shared_region_get(task);
2110 if (shared_region != NULL) {
2111 /*
2112 * A NULL argument is used by dyld to indicate the task
2113 * has unmapped its shared region.
2114 */
2115 if (uap->start_address == 0) {
2116 /* unmap it first */
2117 vm_shared_region_remove(task, shared_region);
2118 vm_shared_region_set(task, NULL);
2119 } else {
2120 /* retrieve address of its first mapping... */
2121 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2122 if (kr != KERN_SUCCESS) {
2123 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2124 "check_np(0x%llx) "
2125 "vm_shared_region_start_address() failed\n",
2126 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2127 proc_getpid(p), p->p_comm,
2128 (uint64_t)uap->start_address));
2129 error = ENOMEM;
2130 } else {
2131 #if __has_feature(ptrauth_calls)
2132 /*
2133 * Remap any section of the shared library that
2134 * has authenticated pointers into private memory.
2135 */
2136 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2137 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2138 "check_np(0x%llx) "
2139 "vm_shared_region_auth_remap() failed\n",
2140 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2141 proc_getpid(p), p->p_comm,
2142 (uint64_t)uap->start_address));
2143 error = ENOMEM;
2144 }
2145 #endif /* __has_feature(ptrauth_calls) */
2146
2147 /* ... and give it to the caller */
2148 if (error == 0) {
2149 error = copyout(&start_address,
2150 (user_addr_t) uap->start_address,
2151 sizeof(start_address));
2152 if (error != 0) {
2153 SHARED_REGION_TRACE_ERROR(
2154 ("shared_region: %p [%d(%s)] "
2155 "check_np(0x%llx) "
2156 "copyout(0x%llx) error %d\n",
2157 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2158 proc_getpid(p), p->p_comm,
2159 (uint64_t)uap->start_address, (uint64_t)start_address,
2160 error));
2161 }
2162 }
2163 }
2164 }
2165 vm_shared_region_deallocate(shared_region);
2166 } else {
2167 /* no shared region ! */
2168 error = EINVAL;
2169 }
2170
2171 SHARED_REGION_TRACE_DEBUG(
2172 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2173 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2174 proc_getpid(p), p->p_comm,
2175 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2176
2177 return error;
2178 }
2179
2180
2181 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2182 shared_region_copyin(
2183 struct proc *p,
2184 user_addr_t user_addr,
2185 unsigned int count,
2186 unsigned int element_size,
2187 void *kernel_data)
2188 {
2189 int error = 0;
2190 vm_size_t size = count * element_size;
2191
2192 error = copyin(user_addr, kernel_data, size);
2193 if (error) {
2194 SHARED_REGION_TRACE_ERROR(
2195 ("shared_region: %p [%d(%s)] map(): "
2196 "copyin(0x%llx, %ld) failed (error=%d)\n",
2197 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2198 proc_getpid(p), p->p_comm,
2199 (uint64_t)user_addr, (long)size, error));
2200 }
2201 return error;
2202 }
2203
2204 /*
2205 * A reasonable upper limit to prevent overflow of allocation/copyin.
2206 */
2207 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2208
2209 /* forward declaration */
2210 __attribute__((noinline))
2211 static void shared_region_map_and_slide_cleanup(
2212 struct proc *p,
2213 uint32_t files_count,
2214 struct _sr_file_mappings *sr_file_mappings,
2215 struct vm_shared_region *shared_region);
2216
2217 /*
2218 * Setup part of _shared_region_map_and_slide().
2219 * It had to be broken out of _shared_region_map_and_slide() to
2220 * prevent compiler inlining from blowing out the stack.
2221 */
2222 __attribute__((noinline))
2223 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)2224 shared_region_map_and_slide_setup(
2225 struct proc *p,
2226 uint32_t files_count,
2227 struct shared_file_np *files,
2228 uint32_t mappings_count,
2229 struct shared_file_mapping_slide_np *mappings,
2230 struct _sr_file_mappings **sr_file_mappings,
2231 struct vm_shared_region **shared_region_ptr,
2232 struct vnode *rdir_vp)
2233 {
2234 int error = 0;
2235 struct _sr_file_mappings *srfmp;
2236 uint32_t mappings_next;
2237 struct vnode_attr va;
2238 off_t fs;
2239 #if CONFIG_MACF
2240 vm_prot_t maxprot = VM_PROT_ALL;
2241 #endif
2242 uint32_t i;
2243 struct vm_shared_region *shared_region = NULL;
2244 boolean_t is_driverkit = task_is_driver(current_task());
2245
2246 SHARED_REGION_TRACE_DEBUG(
2247 ("shared_region: %p [%d(%s)] -> map\n",
2248 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2249 proc_getpid(p), p->p_comm));
2250
2251 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2252 error = E2BIG;
2253 goto done;
2254 }
2255 if (files_count == 0) {
2256 error = EINVAL;
2257 goto done;
2258 }
2259 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2260 Z_WAITOK | Z_ZERO);
2261 if (*sr_file_mappings == NULL) {
2262 error = ENOMEM;
2263 goto done;
2264 }
2265 mappings_next = 0;
2266 for (i = 0; i < files_count; i++) {
2267 srfmp = &(*sr_file_mappings)[i];
2268 srfmp->fd = files[i].sf_fd;
2269 srfmp->mappings_count = files[i].sf_mappings_count;
2270 srfmp->mappings = &mappings[mappings_next];
2271 mappings_next += srfmp->mappings_count;
2272 if (mappings_next > mappings_count) {
2273 error = EINVAL;
2274 goto done;
2275 }
2276 srfmp->slide = files[i].sf_slide;
2277 }
2278
2279 /* get the process's shared region (setup in vm_map_exec()) */
2280 shared_region = vm_shared_region_trim_and_get(current_task());
2281 *shared_region_ptr = shared_region;
2282 if (shared_region == NULL) {
2283 SHARED_REGION_TRACE_ERROR(
2284 ("shared_region: %p [%d(%s)] map(): "
2285 "no shared region\n",
2286 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2287 proc_getpid(p), p->p_comm));
2288 error = EINVAL;
2289 goto done;
2290 }
2291
2292 /*
2293 * Check the shared region matches the current root
2294 * directory of this process. Deny the mapping to
2295 * avoid tainting the shared region with something that
2296 * doesn't quite belong into it.
2297 */
2298 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2299 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2300 SHARED_REGION_TRACE_ERROR(
2301 ("shared_region: map(%p) root_dir mismatch\n",
2302 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2303 error = EPERM;
2304 goto done;
2305 }
2306
2307
2308 for (srfmp = &(*sr_file_mappings)[0];
2309 srfmp < &(*sr_file_mappings)[files_count];
2310 srfmp++) {
2311 if (srfmp->mappings_count == 0) {
2312 /* no mappings here... */
2313 continue;
2314 }
2315
2316 /*
2317 * A file descriptor of -1 is used to indicate that the data
2318 * to be put in the shared region for this mapping comes directly
2319 * from the processes address space. Ensure we have proper alignments.
2320 */
2321 if (srfmp->fd == -1) {
2322 /* only allow one mapping per fd */
2323 if (srfmp->mappings_count > 1) {
2324 SHARED_REGION_TRACE_ERROR(
2325 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
2326 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2327 proc_getpid(p), p->p_comm));
2328 error = EINVAL;
2329 goto done;
2330 }
2331
2332 /*
2333 * The destination address and size must be page aligned.
2334 */
2335 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2336 mach_vm_address_t dest_addr = mapping->sms_address;
2337 mach_vm_size_t map_size = mapping->sms_size;
2338 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
2339 SHARED_REGION_TRACE_ERROR(
2340 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2341 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2342 proc_getpid(p), p->p_comm, dest_addr));
2343 error = EINVAL;
2344 goto done;
2345 }
2346 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
2347 SHARED_REGION_TRACE_ERROR(
2348 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2349 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2350 proc_getpid(p), p->p_comm, map_size));
2351 error = EINVAL;
2352 goto done;
2353 }
2354 continue;
2355 }
2356
2357 /* get file structure from file descriptor */
2358 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2359 if (error) {
2360 SHARED_REGION_TRACE_ERROR(
2361 ("shared_region: %p [%d(%s)] map: "
2362 "fd=%d lookup failed (error=%d)\n",
2363 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2364 proc_getpid(p), p->p_comm, srfmp->fd, error));
2365 goto done;
2366 }
2367
2368 /* we need at least read permission on the file */
2369 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2370 SHARED_REGION_TRACE_ERROR(
2371 ("shared_region: %p [%d(%s)] map: "
2372 "fd=%d not readable\n",
2373 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2374 proc_getpid(p), p->p_comm, srfmp->fd));
2375 error = EPERM;
2376 goto done;
2377 }
2378
2379 /* get vnode from file structure */
2380 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2381 if (error) {
2382 SHARED_REGION_TRACE_ERROR(
2383 ("shared_region: %p [%d(%s)] map: "
2384 "fd=%d getwithref failed (error=%d)\n",
2385 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2386 proc_getpid(p), p->p_comm, srfmp->fd, error));
2387 goto done;
2388 }
2389 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2390
2391 /* make sure the vnode is a regular file */
2392 if (srfmp->vp->v_type != VREG) {
2393 SHARED_REGION_TRACE_ERROR(
2394 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2395 "not a file (type=%d)\n",
2396 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2397 proc_getpid(p), p->p_comm,
2398 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2399 srfmp->vp->v_name, srfmp->vp->v_type));
2400 error = EINVAL;
2401 goto done;
2402 }
2403
2404 #if CONFIG_MACF
2405 /* pass in 0 for the offset argument because AMFI does not need the offset
2406 * of the shared cache */
2407 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2408 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
2409 if (error) {
2410 goto done;
2411 }
2412 #endif /* MAC */
2413
2414 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2415 /*
2416 * Check if the shared cache is in the trust cache;
2417 * if so, we can skip the root ownership check.
2418 */
2419 #if DEVELOPMENT || DEBUG
2420 /*
2421 * Skip both root ownership and trust cache check if
2422 * enforcement is disabled.
2423 */
2424 if (!cs_system_enforcement()) {
2425 goto after_root_check;
2426 }
2427 #endif /* DEVELOPMENT || DEBUG */
2428 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2429 if (blob == NULL) {
2430 SHARED_REGION_TRACE_ERROR(
2431 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2432 "missing CS blob\n",
2433 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2434 proc_getpid(p), p->p_comm,
2435 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2436 srfmp->vp->v_name));
2437 goto root_check;
2438 }
2439 const uint8_t *cdhash = csblob_get_cdhash(blob);
2440 if (cdhash == NULL) {
2441 SHARED_REGION_TRACE_ERROR(
2442 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2443 "missing cdhash\n",
2444 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2445 proc_getpid(p), p->p_comm,
2446 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2447 srfmp->vp->v_name));
2448 goto root_check;
2449 }
2450
2451 bool in_trust_cache = false;
2452 TrustCacheQueryToken_t qt;
2453 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
2454 TCType_t tc_type = kTCTypeInvalid;
2455 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2456 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2457 (tc_type == kTCTypeCryptex1BootOS ||
2458 tc_type == kTCTypeStatic ||
2459 tc_type == kTCTypeEngineering));
2460 }
2461 if (!in_trust_cache) {
2462 SHARED_REGION_TRACE_ERROR(
2463 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2464 "not in trust cache\n",
2465 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2466 proc_getpid(p), p->p_comm,
2467 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2468 srfmp->vp->v_name));
2469 goto root_check;
2470 }
2471 goto after_root_check;
2472 root_check:
2473 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2474
2475 /* The shared cache file must be owned by root */
2476 VATTR_INIT(&va);
2477 VATTR_WANTED(&va, va_uid);
2478 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2479 if (error) {
2480 SHARED_REGION_TRACE_ERROR(
2481 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2482 "vnode_getattr(%p) failed (error=%d)\n",
2483 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2484 proc_getpid(p), p->p_comm,
2485 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2486 srfmp->vp->v_name,
2487 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2488 error));
2489 goto done;
2490 }
2491 if (va.va_uid != 0) {
2492 SHARED_REGION_TRACE_ERROR(
2493 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2494 "owned by uid=%d instead of 0\n",
2495 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2496 proc_getpid(p), p->p_comm,
2497 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2498 srfmp->vp->v_name, va.va_uid));
2499 error = EPERM;
2500 goto done;
2501 }
2502
2503 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2504 after_root_check:
2505 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2506
2507 #if CONFIG_CSR
2508 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2509 VATTR_INIT(&va);
2510 VATTR_WANTED(&va, va_flags);
2511 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2512 if (error) {
2513 SHARED_REGION_TRACE_ERROR(
2514 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2515 "vnode_getattr(%p) failed (error=%d)\n",
2516 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2517 proc_getpid(p), p->p_comm,
2518 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2519 srfmp->vp->v_name,
2520 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2521 error));
2522 goto done;
2523 }
2524
2525 if (!(va.va_flags & SF_RESTRICTED)) {
2526 /*
2527 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2528 * the shared cache file is NOT SIP-protected, so reject the
2529 * mapping request
2530 */
2531 SHARED_REGION_TRACE_ERROR(
2532 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2533 "vnode is not SIP-protected. \n",
2534 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2535 proc_getpid(p), p->p_comm,
2536 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2537 srfmp->vp->v_name));
2538 error = EPERM;
2539 goto done;
2540 }
2541 }
2542 #else /* CONFIG_CSR */
2543
2544 /*
2545 * Devices without SIP/ROSP need to make sure that the shared cache
2546 * is either on the root volume or in the preboot cryptex volume.
2547 */
2548 assert(rdir_vp != NULL);
2549 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2550 vnode_t preboot_vp = NULL;
2551 #if XNU_TARGET_OS_OSX
2552 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2553 #else
2554 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2555 #endif
2556 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2557 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2558 SHARED_REGION_TRACE_ERROR(
2559 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2560 "not on process' root volume nor preboot volume\n",
2561 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2562 proc_getpid(p), p->p_comm,
2563 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2564 srfmp->vp->v_name));
2565 error = EPERM;
2566 if (preboot_vp) {
2567 (void)vnode_put(preboot_vp);
2568 }
2569 goto done;
2570 } else if (preboot_vp) {
2571 (void)vnode_put(preboot_vp);
2572 }
2573 }
2574 #endif /* CONFIG_CSR */
2575
2576 if (scdir_enforce) {
2577 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2578 struct vnode *scdir_vp = NULL;
2579 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2580 *expected_scdir_path != NULL;
2581 expected_scdir_path++) {
2582 /* get vnode for expected_scdir_path */
2583 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
2584 if (error) {
2585 SHARED_REGION_TRACE_ERROR(
2586 ("shared_region: %p [%d(%s)]: "
2587 "vnode_lookup(%s) failed (error=%d)\n",
2588 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2589 proc_getpid(p), p->p_comm,
2590 *expected_scdir_path, error));
2591 continue;
2592 }
2593
2594 /* check if parent is scdir_vp */
2595 assert(scdir_vp != NULL);
2596 if (vnode_parent(srfmp->vp) == scdir_vp) {
2597 (void)vnode_put(scdir_vp);
2598 scdir_vp = NULL;
2599 goto scdir_ok;
2600 }
2601 (void)vnode_put(scdir_vp);
2602 scdir_vp = NULL;
2603 }
2604 /* nothing matches */
2605 SHARED_REGION_TRACE_ERROR(
2606 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2607 "shared cache file not in expected directory\n",
2608 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2609 proc_getpid(p), p->p_comm,
2610 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2611 srfmp->vp->v_name));
2612 error = EPERM;
2613 goto done;
2614 }
2615 scdir_ok:
2616
2617 /* get vnode size */
2618 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2619 if (error) {
2620 SHARED_REGION_TRACE_ERROR(
2621 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2622 "vnode_size(%p) failed (error=%d)\n",
2623 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2624 proc_getpid(p), p->p_comm,
2625 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2626 srfmp->vp->v_name,
2627 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2628 goto done;
2629 }
2630 srfmp->file_size = fs;
2631
2632 /* get the file's memory object handle */
2633 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2634 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2635 SHARED_REGION_TRACE_ERROR(
2636 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2637 "no memory object\n",
2638 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2639 proc_getpid(p), p->p_comm,
2640 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2641 srfmp->vp->v_name));
2642 error = EINVAL;
2643 goto done;
2644 }
2645
2646 /* check that the mappings are properly covered by code signatures */
2647 if (!cs_system_enforcement()) {
2648 /* code signing is not enforced: no need to check */
2649 } else {
2650 for (i = 0; i < srfmp->mappings_count; i++) {
2651 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2652 /* zero-filled mapping: not backed by the file */
2653 continue;
2654 }
2655 if (ubc_cs_is_range_codesigned(srfmp->vp,
2656 srfmp->mappings[i].sms_file_offset,
2657 srfmp->mappings[i].sms_size)) {
2658 /* this mapping is fully covered by code signatures */
2659 continue;
2660 }
2661 SHARED_REGION_TRACE_ERROR(
2662 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2663 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2664 "is not code-signed\n",
2665 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2666 proc_getpid(p), p->p_comm,
2667 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2668 srfmp->vp->v_name,
2669 i, srfmp->mappings_count,
2670 srfmp->mappings[i].sms_address,
2671 srfmp->mappings[i].sms_size,
2672 srfmp->mappings[i].sms_file_offset,
2673 srfmp->mappings[i].sms_max_prot,
2674 srfmp->mappings[i].sms_init_prot));
2675 error = EINVAL;
2676 goto done;
2677 }
2678 }
2679 }
2680 done:
2681 if (error != 0) {
2682 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
2683 *sr_file_mappings = NULL;
2684 *shared_region_ptr = NULL;
2685 }
2686 return error;
2687 }
2688
2689 /*
2690 * shared_region_map_np()
2691 *
2692 * This system call is intended for dyld.
2693 *
2694 * dyld uses this to map a shared cache file into a shared region.
2695 * This is usually done only the first time a shared cache is needed.
2696 * Subsequent processes will just use the populated shared region without
2697 * requiring any further setup.
2698 */
2699 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2700 _shared_region_map_and_slide(
2701 struct proc *p,
2702 uint32_t files_count,
2703 struct shared_file_np *files,
2704 uint32_t mappings_count,
2705 struct shared_file_mapping_slide_np *mappings)
2706 {
2707 int error = 0;
2708 kern_return_t kr = KERN_SUCCESS;
2709 struct _sr_file_mappings *sr_file_mappings = NULL;
2710 struct vnode *rdir_vp = NULL;
2711 struct vm_shared_region *shared_region = NULL;
2712
2713 /*
2714 * Get a reference to the current proc's root dir.
2715 * Need this to prevent racing with chroot.
2716 */
2717 proc_fdlock(p);
2718 rdir_vp = p->p_fd.fd_rdir;
2719 if (rdir_vp == NULL) {
2720 rdir_vp = rootvnode;
2721 }
2722 assert(rdir_vp != NULL);
2723 vnode_get(rdir_vp);
2724 proc_fdunlock(p);
2725
2726 /*
2727 * Turn files, mappings into sr_file_mappings and other setup.
2728 */
2729 error = shared_region_map_and_slide_setup(p, files_count,
2730 files, mappings_count, mappings,
2731 &sr_file_mappings, &shared_region, rdir_vp);
2732 if (error != 0) {
2733 vnode_put(rdir_vp);
2734 return error;
2735 }
2736
2737 /* map the file(s) into that shared region's submap */
2738 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2739 if (kr != KERN_SUCCESS) {
2740 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2741 "vm_shared_region_map_file() failed kr=0x%x\n",
2742 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2743 proc_getpid(p), p->p_comm, kr));
2744 }
2745
2746 /* convert kern_return_t to errno */
2747 switch (kr) {
2748 case KERN_SUCCESS:
2749 error = 0;
2750 break;
2751 case KERN_INVALID_ADDRESS:
2752 error = EFAULT;
2753 break;
2754 case KERN_PROTECTION_FAILURE:
2755 error = EPERM;
2756 break;
2757 case KERN_NO_SPACE:
2758 error = ENOMEM;
2759 break;
2760 case KERN_FAILURE:
2761 case KERN_INVALID_ARGUMENT:
2762 default:
2763 error = EINVAL;
2764 break;
2765 }
2766
2767 /*
2768 * Mark that this process is now using split libraries.
2769 */
2770 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2771 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2772 }
2773
2774 vnode_put(rdir_vp);
2775 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2776
2777 SHARED_REGION_TRACE_DEBUG(
2778 ("shared_region: %p [%d(%s)] <- map\n",
2779 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2780 proc_getpid(p), p->p_comm));
2781
2782 return error;
2783 }
2784
2785 /*
2786 * Clean up part of _shared_region_map_and_slide()
2787 * It had to be broken out of _shared_region_map_and_slide() to
2788 * prevent compiler inlining from blowing out the stack.
2789 */
2790 __attribute__((noinline))
2791 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)2792 shared_region_map_and_slide_cleanup(
2793 struct proc *p,
2794 uint32_t files_count,
2795 struct _sr_file_mappings *sr_file_mappings,
2796 struct vm_shared_region *shared_region)
2797 {
2798 struct _sr_file_mappings *srfmp;
2799 struct vnode_attr va;
2800
2801 if (sr_file_mappings != NULL) {
2802 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2803 if (srfmp->vp != NULL) {
2804 vnode_lock_spin(srfmp->vp);
2805 srfmp->vp->v_flag |= VSHARED_DYLD;
2806 vnode_unlock(srfmp->vp);
2807
2808 /* update the vnode's access time */
2809 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2810 VATTR_INIT(&va);
2811 nanotime(&va.va_access_time);
2812 VATTR_SET_ACTIVE(&va, va_access_time);
2813 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2814 }
2815
2816 #if NAMEDSTREAMS
2817 /*
2818 * If the shared cache is compressed, it may
2819 * have a namedstream vnode instantiated for
2820 * for it. That namedstream vnode will also
2821 * have to be marked with VSHARED_DYLD.
2822 */
2823 if (vnode_hasnamedstreams(srfmp->vp)) {
2824 vnode_t svp;
2825 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2826 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2827 vnode_lock_spin(svp);
2828 svp->v_flag |= VSHARED_DYLD;
2829 vnode_unlock(svp);
2830 vnode_put(svp);
2831 }
2832 }
2833 #endif /* NAMEDSTREAMS */
2834 /*
2835 * release the vnode...
2836 * ubc_map() still holds it for us in the non-error case
2837 */
2838 (void) vnode_put(srfmp->vp);
2839 srfmp->vp = NULL;
2840 }
2841 if (srfmp->fp != NULL) {
2842 /* release the file descriptor */
2843 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2844 srfmp->fp = NULL;
2845 }
2846 }
2847 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2848 }
2849
2850 if (shared_region != NULL) {
2851 vm_shared_region_deallocate(shared_region);
2852 }
2853 }
2854
2855
2856 /*
2857 * For each file mapped, we may have mappings for:
2858 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2859 * so let's round up to 8 mappings per file.
2860 */
2861 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2862
2863 /*
2864 * This is the new interface for setting up shared region mappings.
2865 *
2866 * The slide used for shared regions setup using this interface is done differently
2867 * from the old interface. The slide value passed in the shared_files_np represents
2868 * a max value. The kernel will choose a random value based on that, then use it
2869 * for all shared regions.
2870 */
2871 #if defined (__x86_64__)
2872 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2873 #else
2874 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2875 #endif
2876
2877 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2878 shared_region_map_and_slide_2_np(
2879 struct proc *p,
2880 struct shared_region_map_and_slide_2_np_args *uap,
2881 __unused int *retvalp)
2882 {
2883 unsigned int files_count;
2884 struct shared_file_np *shared_files = NULL;
2885 unsigned int mappings_count;
2886 struct shared_file_mapping_slide_np *mappings = NULL;
2887 kern_return_t kr = KERN_SUCCESS;
2888
2889 files_count = uap->files_count;
2890 mappings_count = uap->mappings_count;
2891
2892 if (files_count == 0) {
2893 SHARED_REGION_TRACE_INFO(
2894 ("shared_region: %p [%d(%s)] map(): "
2895 "no files\n",
2896 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2897 proc_getpid(p), p->p_comm));
2898 kr = 0; /* no files to map: we're done ! */
2899 goto done;
2900 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2901 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2902 if (shared_files == NULL) {
2903 kr = KERN_RESOURCE_SHORTAGE;
2904 goto done;
2905 }
2906 } else {
2907 SHARED_REGION_TRACE_ERROR(
2908 ("shared_region: %p [%d(%s)] map(): "
2909 "too many files (%d) max %d\n",
2910 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2911 proc_getpid(p), p->p_comm,
2912 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2913 kr = KERN_FAILURE;
2914 goto done;
2915 }
2916
2917 if (mappings_count == 0) {
2918 SHARED_REGION_TRACE_INFO(
2919 ("shared_region: %p [%d(%s)] map(): "
2920 "no mappings\n",
2921 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2922 proc_getpid(p), p->p_comm));
2923 kr = 0; /* no mappings: we're done ! */
2924 goto done;
2925 } else if (mappings_count <= SFM_MAX) {
2926 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2927 if (mappings == NULL) {
2928 kr = KERN_RESOURCE_SHORTAGE;
2929 goto done;
2930 }
2931 } else {
2932 SHARED_REGION_TRACE_ERROR(
2933 ("shared_region: %p [%d(%s)] map(): "
2934 "too many mappings (%d) max %d\n",
2935 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2936 proc_getpid(p), p->p_comm,
2937 mappings_count, SFM_MAX));
2938 kr = KERN_FAILURE;
2939 goto done;
2940 }
2941
2942 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2943 if (kr != KERN_SUCCESS) {
2944 goto done;
2945 }
2946
2947 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2948 if (kr != KERN_SUCCESS) {
2949 goto done;
2950 }
2951
2952 uint32_t max_slide = shared_files[0].sf_slide;
2953 uint32_t random_val;
2954 uint32_t slide_amount;
2955
2956 if (max_slide != 0) {
2957 read_random(&random_val, sizeof random_val);
2958 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2959 } else {
2960 slide_amount = 0;
2961 }
2962 #if DEVELOPMENT || DEBUG
2963 extern bool bootarg_disable_aslr;
2964 if (bootarg_disable_aslr) {
2965 slide_amount = 0;
2966 }
2967 #endif /* DEVELOPMENT || DEBUG */
2968
2969 /*
2970 * Fix up the mappings to reflect the desired slide.
2971 */
2972 unsigned int f;
2973 unsigned int m = 0;
2974 unsigned int i;
2975 for (f = 0; f < files_count; ++f) {
2976 shared_files[f].sf_slide = slide_amount;
2977 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2978 if (m >= mappings_count) {
2979 SHARED_REGION_TRACE_ERROR(
2980 ("shared_region: %p [%d(%s)] map(): "
2981 "mapping count argument was too small\n",
2982 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2983 proc_getpid(p), p->p_comm));
2984 kr = KERN_FAILURE;
2985 goto done;
2986 }
2987 mappings[m].sms_address += slide_amount;
2988 if (mappings[m].sms_slide_size != 0) {
2989 mappings[m].sms_slide_start += slide_amount;
2990 }
2991 }
2992 }
2993
2994 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2995 done:
2996 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2997 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2998 return kr;
2999 }
3000
3001 /*
3002 * A syscall for dyld to use to map data pages that need load time relocation fixups.
3003 * The fixups are performed by a custom pager during page-in, so the pages still appear
3004 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3005 * on demand later, all w/o using the compressor.
3006 *
3007 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3008 * running, they are COW'd as normal.
3009 */
3010 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)3011 map_with_linking_np(
3012 struct proc *p,
3013 struct map_with_linking_np_args *uap,
3014 __unused int *retvalp)
3015 {
3016 uint32_t region_count;
3017 uint32_t r;
3018 struct mwl_region *regions = NULL;
3019 struct mwl_region *rp;
3020 uint32_t link_info_size;
3021 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
3022 struct mwl_info_hdr *info_hdr = NULL;
3023 uint64_t binds_size;
3024 int fd;
3025 struct fileproc *fp = NULL;
3026 struct vnode *vp = NULL;
3027 size_t file_size;
3028 off_t fs;
3029 struct vnode_attr va;
3030 memory_object_control_t file_control = NULL;
3031 int error;
3032 kern_return_t kr = KERN_SUCCESS;
3033
3034 /*
3035 * Check if dyld has told us it finished with this call.
3036 */
3037 if (p->p_disallow_map_with_linking) {
3038 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3039 __func__, proc_getpid(p), p->p_comm);
3040 kr = KERN_FAILURE;
3041 goto done;
3042 }
3043
3044 /*
3045 * First we do some sanity checking on what dyld has passed us.
3046 */
3047 region_count = uap->region_count;
3048 link_info_size = uap->link_info_size;
3049 if (region_count == 0) {
3050 printf("%s: [%d(%s)]: region_count == 0\n",
3051 __func__, proc_getpid(p), p->p_comm);
3052 kr = KERN_FAILURE;
3053 goto done;
3054 }
3055 if (region_count > MWL_MAX_REGION_COUNT) {
3056 printf("%s: [%d(%s)]: region_count too big %d\n",
3057 __func__, proc_getpid(p), p->p_comm, region_count);
3058 kr = KERN_FAILURE;
3059 goto done;
3060 }
3061
3062 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3063 printf("%s: [%d(%s)]: link_info_size too small\n",
3064 __func__, proc_getpid(p), p->p_comm);
3065 kr = KERN_FAILURE;
3066 goto done;
3067 }
3068 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3069 printf("%s: [%d(%s)]: link_info_size too big %d\n",
3070 __func__, proc_getpid(p), p->p_comm, link_info_size);
3071 kr = KERN_FAILURE;
3072 goto done;
3073 }
3074
3075 /*
3076 * Allocate and copyin the regions and link info
3077 */
3078 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3079 if (regions == NULL) {
3080 printf("%s: [%d(%s)]: failed to allocate regions\n",
3081 __func__, proc_getpid(p), p->p_comm);
3082 kr = KERN_RESOURCE_SHORTAGE;
3083 goto done;
3084 }
3085 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
3086 if (kr != KERN_SUCCESS) {
3087 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3088 __func__, proc_getpid(p), p->p_comm, kr);
3089 goto done;
3090 }
3091
3092 link_info = kalloc_data(link_info_size, Z_WAITOK);
3093 if (link_info == NULL) {
3094 printf("%s: [%d(%s)]: failed to allocate link_info\n",
3095 __func__, proc_getpid(p), p->p_comm);
3096 kr = KERN_RESOURCE_SHORTAGE;
3097 goto done;
3098 }
3099 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
3100 if (kr != KERN_SUCCESS) {
3101 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3102 __func__, proc_getpid(p), p->p_comm, kr);
3103 goto done;
3104 }
3105
3106 /*
3107 * Do some verification the data structures.
3108 */
3109 info_hdr = (struct mwl_info_hdr *)link_info;
3110 if (info_hdr->mwli_version != MWL_INFO_VERS) {
3111 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3112 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3113 kr = KERN_FAILURE;
3114 goto done;
3115 }
3116
3117 if (info_hdr->mwli_binds_offset > link_info_size) {
3118 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3119 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3120 kr = KERN_FAILURE;
3121 goto done;
3122 }
3123
3124 /* some older devs have s/w page size > h/w page size, no need to support them */
3125 if (info_hdr->mwli_page_size != PAGE_SIZE) {
3126 /* no printf, since this is expected on some devices */
3127 kr = KERN_INVALID_ARGUMENT;
3128 goto done;
3129 }
3130
3131 binds_size = (uint64_t)info_hdr->mwli_binds_count *
3132 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3133 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3134 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3135 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3136 kr = KERN_FAILURE;
3137 goto done;
3138 }
3139
3140 if (info_hdr->mwli_chains_offset > link_info_size) {
3141 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3142 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3143 kr = KERN_FAILURE;
3144 goto done;
3145 }
3146
3147
3148 /*
3149 * Ensure the chained starts in the link info and make sure the
3150 * segment info offsets are within bounds.
3151 */
3152 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3153 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3154 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3155 kr = KERN_FAILURE;
3156 goto done;
3157 }
3158 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3159 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3160 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3161 kr = KERN_FAILURE;
3162 goto done;
3163 }
3164
3165 /* Note that more verification of offsets is done in the pager itself */
3166
3167 /*
3168 * Ensure we've only been given one FD and verify valid protections.
3169 */
3170 fd = regions[0].mwlr_fd;
3171 for (r = 0; r < region_count; ++r) {
3172 if (regions[r].mwlr_fd != fd) {
3173 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3174 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3175 kr = KERN_FAILURE;
3176 goto done;
3177 }
3178
3179 /*
3180 * Only allow data mappings and not zero fill. Permit TPRO
3181 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
3182 */
3183 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3184 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3185 __func__, proc_getpid(p), p->p_comm);
3186 kr = KERN_FAILURE;
3187 goto done;
3188 }
3189 if (regions[r].mwlr_protections & VM_PROT_ZF) {
3190 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
3191 __func__, proc_getpid(p), p->p_comm, r);
3192 kr = KERN_FAILURE;
3193 goto done;
3194 }
3195 if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
3196 !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
3197 printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
3198 __func__, proc_getpid(p), p->p_comm, r);
3199 kr = KERN_FAILURE;
3200 goto done;
3201 }
3202 }
3203
3204
3205 /* get file structure from file descriptor */
3206 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
3207 if (error) {
3208 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3209 __func__, proc_getpid(p), p->p_comm, error);
3210 kr = KERN_FAILURE;
3211 goto done;
3212 }
3213
3214 /* We need at least read permission on the file */
3215 if (!(fp->fp_glob->fg_flag & FREAD)) {
3216 printf("%s: [%d(%s)]: not readable\n",
3217 __func__, proc_getpid(p), p->p_comm);
3218 kr = KERN_FAILURE;
3219 goto done;
3220 }
3221
3222 /* Get the vnode from file structure */
3223 vp = (struct vnode *)fp_get_data(fp);
3224 error = vnode_getwithref(vp);
3225 if (error) {
3226 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3227 __func__, proc_getpid(p), p->p_comm, error);
3228 kr = KERN_FAILURE;
3229 vp = NULL; /* just to be sure */
3230 goto done;
3231 }
3232
3233 /* Make sure the vnode is a regular file */
3234 if (vp->v_type != VREG) {
3235 printf("%s: [%d(%s)]: vnode not VREG\n",
3236 __func__, proc_getpid(p), p->p_comm);
3237 kr = KERN_FAILURE;
3238 goto done;
3239 }
3240
3241 /* get vnode size */
3242 error = vnode_size(vp, &fs, vfs_context_current());
3243 if (error) {
3244 goto done;
3245 }
3246 file_size = fs;
3247
3248 /* get the file's memory object handle */
3249 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3250 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3251 printf("%s: [%d(%s)]: no memory object\n",
3252 __func__, proc_getpid(p), p->p_comm);
3253 kr = KERN_FAILURE;
3254 goto done;
3255 }
3256
3257 for (r = 0; r < region_count; ++r) {
3258 rp = ®ions[r];
3259
3260 #if CONFIG_MACF
3261 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3262 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
3263 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
3264 if (error) {
3265 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3266 __func__, proc_getpid(p), p->p_comm, r, error);
3267 kr = KERN_FAILURE;
3268 goto done;
3269 }
3270 #endif /* MAC */
3271
3272 /* check that the mappings are properly covered by code signatures */
3273 if (cs_system_enforcement()) {
3274 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3275 printf("%s: [%d(%s)]: region %d, not code signed\n",
3276 __func__, proc_getpid(p), p->p_comm, r);
3277 kr = KERN_FAILURE;
3278 goto done;
3279 }
3280 }
3281 }
3282
3283 /* update the vnode's access time */
3284 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3285 VATTR_INIT(&va);
3286 nanotime(&va.va_access_time);
3287 VATTR_SET_ACTIVE(&va, va_access_time);
3288 vnode_setattr(vp, &va, vfs_context_current());
3289 }
3290
3291 /* get the VM to do the work */
3292 kr = vm_map_with_linking(proc_task(p), regions, region_count, link_info, link_info_size, file_control);
3293
3294 done:
3295 if (fp != NULL) {
3296 /* release the file descriptor */
3297 fp_drop(p, fd, fp, 0);
3298 }
3299 if (vp != NULL) {
3300 (void)vnode_put(vp);
3301 }
3302 if (regions != NULL) {
3303 kfree_data(regions, region_count * sizeof(regions[0]));
3304 }
3305 /* link info is used in the pager if things worked */
3306 if (link_info != NULL && kr != KERN_SUCCESS) {
3307 kfree_data(link_info, link_info_size);
3308 }
3309
3310 switch (kr) {
3311 case KERN_SUCCESS:
3312 return 0;
3313 case KERN_RESOURCE_SHORTAGE:
3314 return ENOMEM;
3315 default:
3316 return EINVAL;
3317 }
3318 }
3319
3320 #if DEBUG || DEVELOPMENT
3321 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3322 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3323 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3324 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3325 #endif /* DEBUG || DEVELOPMENT */
3326
3327 /* sysctl overflow room */
3328
3329 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3330 (int *) &page_size, 0, "vm page size");
3331
3332 /* vm_page_free_target is provided as a makeshift solution for applications that want to
3333 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3334 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3335 extern unsigned int vm_page_free_target;
3336 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3337 &vm_page_free_target, 0, "Pageout daemon free target");
3338
3339 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3340 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3341
3342 static int
3343 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3344 {
3345 #pragma unused(oidp, arg1, arg2)
3346 unsigned int page_free_wanted;
3347
3348 page_free_wanted = mach_vm_ctl_page_free_wanted();
3349 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3350 }
3351 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3352 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3353 0, 0, vm_ctl_page_free_wanted, "I", "");
3354
3355 extern unsigned int vm_page_purgeable_count;
3356 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3357 &vm_page_purgeable_count, 0, "Purgeable page count");
3358
3359 extern unsigned int vm_page_purgeable_wired_count;
3360 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3361 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3362
3363 extern unsigned int vm_page_kern_lpage_count;
3364 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3365 &vm_page_kern_lpage_count, 0, "kernel used large pages");
3366
3367 #if DEVELOPMENT || DEBUG
3368 #if __ARM_MIXED_PAGE_SIZE__
3369 static int vm_mixed_pagesize_supported = 1;
3370 #else
3371 static int vm_mixed_pagesize_supported = 0;
3372 #endif /*__ARM_MIXED_PAGE_SIZE__ */
3373 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3374 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3375
3376 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3377 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3378 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3379 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3380
3381 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3382 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3383 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3384 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3385 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3386 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3387
3388 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3389 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3390 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3391 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3392 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3393 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3394 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3395 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3396 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3397 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3398 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3399 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3400 #endif /* DEVELOPMENT || DEBUG */
3401
3402 extern int madvise_free_debug;
3403 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3404 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3405 extern int madvise_free_debug_sometimes;
3406 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3407 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3408
3409 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3410 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3411 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3412 &vm_page_stats_reusable.reusable_pages_success, "");
3413 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3414 &vm_page_stats_reusable.reusable_pages_failure, "");
3415 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3416 &vm_page_stats_reusable.reusable_pages_shared, "");
3417 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3418 &vm_page_stats_reusable.all_reusable_calls, "");
3419 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3420 &vm_page_stats_reusable.partial_reusable_calls, "");
3421 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3422 &vm_page_stats_reusable.reuse_pages_success, "");
3423 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3424 &vm_page_stats_reusable.reuse_pages_failure, "");
3425 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3426 &vm_page_stats_reusable.all_reuse_calls, "");
3427 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3428 &vm_page_stats_reusable.partial_reuse_calls, "");
3429 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3430 &vm_page_stats_reusable.can_reuse_success, "");
3431 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3432 &vm_page_stats_reusable.can_reuse_failure, "");
3433 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3434 &vm_page_stats_reusable.reusable_reclaimed, "");
3435 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3436 &vm_page_stats_reusable.reusable_nonwritable, "");
3437 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3438 &vm_page_stats_reusable.reusable_shared, "");
3439 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3440 &vm_page_stats_reusable.free_shared, "");
3441
3442
3443 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3444 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3445 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3446
3447 extern unsigned int vm_page_cleaned_count;
3448 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3449
3450 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3451 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3452 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3453
3454 /* pageout counts */
3455 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3456 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3457
3458 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3459 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3460 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3461 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3462 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3463 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3464
3465 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3466 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3467 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3468 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3469 extern unsigned int vm_page_realtime_count;
3470 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3471 extern int vm_pageout_protect_realtime;
3472 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3473
3474 /* counts of pages prefaulted when entering a memory object */
3475 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3476 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3477 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3478
3479 #if defined (__x86_64__)
3480 extern unsigned int vm_clump_promote_threshold;
3481 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3482 #if DEVELOPMENT || DEBUG
3483 extern unsigned long vm_clump_stats[];
3484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3488 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3489 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3490 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3491 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3492 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3493 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3494 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3500 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3503 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3504 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3505 #endif /* if DEVELOPMENT || DEBUG */
3506 #endif /* #if defined (__x86_64__) */
3507
3508 #if CONFIG_SECLUDED_MEMORY
3509
3510 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3511 extern unsigned int vm_page_secluded_target;
3512 extern unsigned int vm_page_secluded_count;
3513 extern unsigned int vm_page_secluded_count_free;
3514 extern unsigned int vm_page_secluded_count_inuse;
3515 extern unsigned int vm_page_secluded_count_over_target;
3516 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3517 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3518 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3519 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3520 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3521
3522 extern struct vm_page_secluded_data vm_page_secluded;
3523 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3524 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3525 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3526 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3530 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3531 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3532
3533 #endif /* CONFIG_SECLUDED_MEMORY */
3534
3535 #pragma mark Deferred Reclaim
3536
3537 #if CONFIG_DEFERRED_RECLAIM
3538
3539 #if DEVELOPMENT || DEBUG
3540 /*
3541 * VM reclaim testing
3542 */
3543 extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
3544
3545 static int
3546 sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
3547 {
3548 #pragma unused(arg1, arg2)
3549 int error = EINVAL, pid = 0;
3550 /*
3551 * Only send on write
3552 */
3553 error = sysctl_handle_int(oidp, &pid, 0, req);
3554 if (error || !req->newptr) {
3555 return error;
3556 }
3557
3558 bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
3559 if (success) {
3560 error = 0;
3561 }
3562
3563 return error;
3564 }
3565
3566 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
3567 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
3568 &sysctl_vm_reclaim_drain_async_queue, "I", "");
3569
3570
3571 extern uint64_t vm_reclaim_max_threshold;
3572 extern uint64_t vm_reclaim_trim_divisor;
3573
3574 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
3575 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
3576 #endif /* DEVELOPMENT || DEBUG */
3577
3578 #endif /* CONFIG_DEFERRED_RECLAIM */
3579
3580 #include <kern/thread.h>
3581 #include <sys/user.h>
3582
3583 void vm_pageout_io_throttle(void);
3584
3585 void
vm_pageout_io_throttle(void)3586 vm_pageout_io_throttle(void)
3587 {
3588 struct uthread *uthread = current_uthread();
3589
3590 /*
3591 * thread is marked as a low priority I/O type
3592 * and the I/O we issued while in this cleaning operation
3593 * collided with normal I/O operations... we'll
3594 * delay in order to mitigate the impact of this
3595 * task on the normal operation of the system
3596 */
3597
3598 if (uthread->uu_lowpri_window) {
3599 throttle_lowpri_io(1);
3600 }
3601 }
3602
3603 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3604 vm_pressure_monitor(
3605 __unused struct proc *p,
3606 struct vm_pressure_monitor_args *uap,
3607 int *retval)
3608 {
3609 kern_return_t kr;
3610 uint32_t pages_reclaimed;
3611 uint32_t pages_wanted;
3612
3613 kr = mach_vm_pressure_monitor(
3614 (boolean_t) uap->wait_for_pressure,
3615 uap->nsecs_monitored,
3616 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3617 &pages_wanted);
3618
3619 switch (kr) {
3620 case KERN_SUCCESS:
3621 break;
3622 case KERN_ABORTED:
3623 return EINTR;
3624 default:
3625 return EINVAL;
3626 }
3627
3628 if (uap->pages_reclaimed) {
3629 if (copyout((void *)&pages_reclaimed,
3630 uap->pages_reclaimed,
3631 sizeof(pages_reclaimed)) != 0) {
3632 return EFAULT;
3633 }
3634 }
3635
3636 *retval = (int) pages_wanted;
3637 return 0;
3638 }
3639
3640 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3641 kas_info(struct proc *p,
3642 struct kas_info_args *uap,
3643 int *retval __unused)
3644 {
3645 #ifndef CONFIG_KAS_INFO
3646 (void)p;
3647 (void)uap;
3648 return ENOTSUP;
3649 #else /* CONFIG_KAS_INFO */
3650 int selector = uap->selector;
3651 user_addr_t valuep = uap->value;
3652 user_addr_t sizep = uap->size;
3653 user_size_t size, rsize;
3654 int error;
3655
3656 if (!kauth_cred_issuser(kauth_cred_get())) {
3657 return EPERM;
3658 }
3659
3660 #if CONFIG_MACF
3661 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3662 if (error) {
3663 return error;
3664 }
3665 #endif
3666
3667 if (IS_64BIT_PROCESS(p)) {
3668 user64_size_t size64;
3669 error = copyin(sizep, &size64, sizeof(size64));
3670 size = (user_size_t)size64;
3671 } else {
3672 user32_size_t size32;
3673 error = copyin(sizep, &size32, sizeof(size32));
3674 size = (user_size_t)size32;
3675 }
3676 if (error) {
3677 return error;
3678 }
3679
3680 switch (selector) {
3681 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3682 {
3683 uint64_t slide = vm_kernel_slide;
3684
3685 if (sizeof(slide) != size) {
3686 return EINVAL;
3687 }
3688
3689 error = copyout(&slide, valuep, sizeof(slide));
3690 if (error) {
3691 return error;
3692 }
3693 rsize = size;
3694 }
3695 break;
3696 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3697 {
3698 uint32_t i;
3699 kernel_mach_header_t *mh = &_mh_execute_header;
3700 struct load_command *cmd;
3701 cmd = (struct load_command*) &mh[1];
3702 uint64_t *bases;
3703 rsize = mh->ncmds * sizeof(uint64_t);
3704
3705 /*
3706 * Return the size if no data was passed
3707 */
3708 if (valuep == 0) {
3709 break;
3710 }
3711
3712 if (rsize > size) {
3713 return EINVAL;
3714 }
3715
3716 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3717
3718 for (i = 0; i < mh->ncmds; i++) {
3719 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3720 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3721 bases[i] = (uint64_t)sg->vmaddr;
3722 }
3723 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3724 }
3725
3726 error = copyout(bases, valuep, rsize);
3727
3728 kfree_data(bases, rsize);
3729
3730 if (error) {
3731 return error;
3732 }
3733 }
3734 break;
3735 default:
3736 return EINVAL;
3737 }
3738
3739 if (IS_64BIT_PROCESS(p)) {
3740 user64_size_t size64 = (user64_size_t)rsize;
3741 error = copyout(&size64, sizep, sizeof(size64));
3742 } else {
3743 user32_size_t size32 = (user32_size_t)rsize;
3744 error = copyout(&size32, sizep, sizeof(size32));
3745 }
3746
3747 return error;
3748 #endif /* CONFIG_KAS_INFO */
3749 }
3750
3751 #if __has_feature(ptrauth_calls)
3752 /*
3753 * Generate a random pointer signing key that isn't 0.
3754 */
3755 uint64_t
generate_jop_key(void)3756 generate_jop_key(void)
3757 {
3758 uint64_t key;
3759
3760 do {
3761 read_random(&key, sizeof key);
3762 } while (key == 0);
3763 return key;
3764 }
3765 #endif /* __has_feature(ptrauth_calls) */
3766
3767
3768 #pragma clang diagnostic push
3769 #pragma clang diagnostic ignored "-Wcast-qual"
3770 #pragma clang diagnostic ignored "-Wunused-function"
3771
3772 static void
asserts()3773 asserts()
3774 {
3775 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3776 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3777 }
3778
3779 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3780 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3781 #pragma clang diagnostic pop
3782
3783 extern uint32_t vm_page_pages;
3784 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3785
3786 extern uint32_t vm_page_busy_absent_skipped;
3787 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3788
3789 extern uint32_t vm_page_upl_tainted;
3790 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3791
3792 extern uint32_t vm_page_iopl_tainted;
3793 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3794
3795 #if __arm64__ && (DEVELOPMENT || DEBUG)
3796 extern int vm_footprint_suspend_allowed;
3797 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3798
3799 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3800 static int
3801 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3802 {
3803 #pragma unused(oidp, arg1, arg2)
3804 int error = 0;
3805 int new_value;
3806
3807 if (req->newptr == USER_ADDR_NULL) {
3808 return 0;
3809 }
3810 error = SYSCTL_IN(req, &new_value, sizeof(int));
3811 if (error) {
3812 return error;
3813 }
3814 if (!vm_footprint_suspend_allowed) {
3815 if (new_value != 0) {
3816 /* suspends are not allowed... */
3817 return 0;
3818 }
3819 /* ... but let resumes proceed */
3820 }
3821 DTRACE_VM2(footprint_suspend,
3822 vm_map_t, current_map(),
3823 int, new_value);
3824
3825 pmap_footprint_suspend(current_map(), new_value);
3826
3827 return 0;
3828 }
3829 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3830 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3831 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3832 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3833
3834 extern uint64_t vm_map_corpse_footprint_count;
3835 extern uint64_t vm_map_corpse_footprint_size_avg;
3836 extern uint64_t vm_map_corpse_footprint_size_max;
3837 extern uint64_t vm_map_corpse_footprint_full;
3838 extern uint64_t vm_map_corpse_footprint_no_buf;
3839 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3840 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3841 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3842 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3843 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3844 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3845 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3846 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3847 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3848 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3849
3850 #if CODE_SIGNING_MONITOR
3851 extern uint64_t vm_cs_defer_to_csm;
3852 extern uint64_t vm_cs_defer_to_csm_not;
3853 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3854 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3855 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3856 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3857 #endif /* CODE_SIGNING_MONITOR */
3858
3859 extern uint64_t shared_region_pager_copied;
3860 extern uint64_t shared_region_pager_slid;
3861 extern uint64_t shared_region_pager_slid_error;
3862 extern uint64_t shared_region_pager_reclaimed;
3863 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3864 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3865 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3866 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3867 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3868 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3869 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3870 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3871 extern int shared_region_destroy_delay;
3872 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3873 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3874
3875 #if MACH_ASSERT
3876 extern int pmap_ledgers_panic_leeway;
3877 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3878 #endif /* MACH_ASSERT */
3879
3880
3881 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3882 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3883 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3884 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3885 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3886 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3887 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3888 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3889 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3890 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3891 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3892 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3893 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3894 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3895 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3896 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3897 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3898 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3899 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3900 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3901 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3902 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3903 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3904 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3905 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3906 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3907 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3908 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3909 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3910 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3911 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3912 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3913 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3914 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3915 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3916 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3917 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3918 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3919 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3920
3921 extern int vm_protect_privileged_from_untrusted;
3922 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3923 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3924 extern uint64_t vm_copied_on_read;
3925 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3926 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3927
3928 extern int vm_shared_region_count;
3929 extern int vm_shared_region_peak;
3930 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3931 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3932 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3933 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3934 #if DEVELOPMENT || DEBUG
3935 extern unsigned int shared_region_pagers_resident_count;
3936 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3937 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3938 extern unsigned int shared_region_pagers_resident_peak;
3939 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3940 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3941 extern int shared_region_pager_count;
3942 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3943 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3944 #if __has_feature(ptrauth_calls)
3945 extern int shared_region_key_count;
3946 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3947 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3948 extern int vm_shared_region_reslide_count;
3949 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3950 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3951 #endif /* __has_feature(ptrauth_calls) */
3952 #endif /* DEVELOPMENT || DEBUG */
3953
3954 #if MACH_ASSERT
3955 extern int debug4k_filter;
3956 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3957 extern int debug4k_panic_on_terminate;
3958 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3959 extern int debug4k_panic_on_exception;
3960 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3961 extern int debug4k_panic_on_misaligned_sharing;
3962 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3963 #endif /* MACH_ASSERT */
3964
3965 extern uint64_t vm_map_set_size_limit_count;
3966 extern uint64_t vm_map_set_data_limit_count;
3967 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3968 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3969 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3970 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3971 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3972 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3973
3974 extern uint64_t vm_fault_resilient_media_initiate;
3975 extern uint64_t vm_fault_resilient_media_retry;
3976 extern uint64_t vm_fault_resilient_media_proceed;
3977 extern uint64_t vm_fault_resilient_media_release;
3978 extern uint64_t vm_fault_resilient_media_abort1;
3979 extern uint64_t vm_fault_resilient_media_abort2;
3980 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3981 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3982 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3983 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3984 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3985 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3986 #if MACH_ASSERT
3987 extern int vm_fault_resilient_media_inject_error1_rate;
3988 extern int vm_fault_resilient_media_inject_error1;
3989 extern int vm_fault_resilient_media_inject_error2_rate;
3990 extern int vm_fault_resilient_media_inject_error2;
3991 extern int vm_fault_resilient_media_inject_error3_rate;
3992 extern int vm_fault_resilient_media_inject_error3;
3993 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3994 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3995 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3996 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3997 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3998 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3999 #endif /* MACH_ASSERT */
4000
4001 extern uint64_t pmap_query_page_info_retries;
4002 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
4003
4004 /*
4005 * A sysctl which causes all existing shared regions to become stale. They
4006 * will no longer be used by anything new and will be torn down as soon as
4007 * the last existing user exits. A write of non-zero value causes that to happen.
4008 * This should only be used by launchd, so we check that this is initproc.
4009 */
4010 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4011 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4012 {
4013 unsigned int value = 0;
4014 int changed = 0;
4015 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
4016 if (error || !changed) {
4017 return error;
4018 }
4019 if (current_proc() != initproc) {
4020 return EPERM;
4021 }
4022
4023 vm_shared_region_pivot();
4024
4025 return 0;
4026 }
4027
4028 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
4029 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
4030 0, 0, shared_region_pivot, "I", "");
4031
4032 extern uint64_t vm_object_shadow_forced;
4033 extern uint64_t vm_object_shadow_skipped;
4034 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
4035 &vm_object_shadow_forced, "");
4036 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
4037 &vm_object_shadow_skipped, "");
4038
4039 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
4040 &vmtc_total, 0, "total text page corruptions detected");
4041
4042
4043 #if DEBUG || DEVELOPMENT
4044 /*
4045 * A sysctl that can be used to corrupt a text page with an illegal instruction.
4046 * Used for testing text page self healing.
4047 */
4048 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4049 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)4050 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4051 {
4052 uint64_t value = 0;
4053 int error = sysctl_handle_quad(oidp, &value, 0, req);
4054 if (error || !req->newptr) {
4055 return error;
4056 }
4057
4058 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4059 return 0;
4060 } else {
4061 return EINVAL;
4062 }
4063 }
4064
4065 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4066 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4067 0, 0, corrupt_text_addr, "-", "");
4068 #endif /* DEBUG || DEVELOPMENT */
4069
4070 #if CONFIG_MAP_RANGES
4071 /*
4072 * vm.malloc_ranges
4073 *
4074 * space-separated list of <left:right> hexadecimal addresses.
4075 */
4076 static int
4077 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
4078 {
4079 vm_map_t map = current_map();
4080 struct mach_vm_range r1, r2;
4081 char str[20 * 4];
4082 int len;
4083
4084 if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
4085 return ENOENT;
4086 }
4087 if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
4088 return ENOENT;
4089 }
4090
4091 len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
4092 r1.max_address, r2.min_address,
4093 r2.max_address, get_map_max(map));
4094
4095 return SYSCTL_OUT(req, str, len);
4096 }
4097
4098 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
4099 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4100 0, 0, &vm_map_malloc_ranges, "A", "");
4101
4102 #if DEBUG || DEVELOPMENT
4103 static int
4104 vm_map_user_range_default SYSCTL_HANDLER_ARGS
4105 {
4106 #pragma unused(arg1, arg2, oidp)
4107 struct mach_vm_range range;
4108
4109 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4110 != KERN_SUCCESS) {
4111 return EINVAL;
4112 }
4113
4114 return SYSCTL_OUT(req, &range, sizeof(range));
4115 }
4116
4117 static int
4118 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4119 {
4120 #pragma unused(arg1, arg2, oidp)
4121 struct mach_vm_range range;
4122
4123 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4124 != KERN_SUCCESS) {
4125 return EINVAL;
4126 }
4127
4128 return SYSCTL_OUT(req, &range, sizeof(range));
4129 }
4130
4131 /*
4132 * A sysctl that can be used to return ranges for the current VM map.
4133 * Used for testing VM ranges.
4134 */
4135 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4136 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4137 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4138 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4139
4140 #endif /* DEBUG || DEVELOPMENT */
4141 #endif /* CONFIG_MAP_RANGES */
4142
4143 #if DEBUG || DEVELOPMENT
4144 #endif /* DEBUG || DEVELOPMENT */
4145
4146 extern uint64_t vm_map_range_overflows_count;
4147 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4148 extern boolean_t vm_map_range_overflows_log;
4149 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4150
4151 extern uint64_t c_seg_filled_no_contention;
4152 extern uint64_t c_seg_filled_contention;
4153 extern clock_sec_t c_seg_filled_contention_sec_max;
4154 extern clock_nsec_t c_seg_filled_contention_nsec_max;
4155 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4156 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4157 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4158 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4159 #if (XNU_TARGET_OS_OSX && __arm64__)
4160 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4161 extern int c_process_major_yield_after; /* yield after moving ? segments */
4162 extern uint64_t c_process_major_reports;
4163 extern clock_sec_t c_process_major_max_sec;
4164 extern clock_nsec_t c_process_major_max_nsec;
4165 extern uint32_t c_process_major_peak_segcount;
4166 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4167 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4168 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4169 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4170 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4171 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4172 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4173
4174 #if DEVELOPMENT || DEBUG
4175 extern int panic_object_not_alive;
4176 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4177 #endif /* DEVELOPMENT || DEBUG */
4178
4179 #if MACH_ASSERT
4180 extern int fbdp_no_panic;
4181 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4182 #endif /* MACH_ASSERT */
4183
4184