1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #if NECP
86 #include <net/necp.h>
87 #endif /* NECP */
88 #if SKYWALK
89 #include <skywalk/os_channel.h>
90 #endif /* SKYWALK */
91
92 #include <security/audit/audit.h>
93 #include <security/mac.h>
94 #include <bsm/audit_kevents.h>
95
96 #include <kern/kalloc.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100
101 #include <mach/shared_region.h>
102 #include <vm/vm_shared_region.h>
103
104 #include <vm/vm_protos.h>
105
106 #include <sys/kern_memorystatus.h>
107 #include <sys/kern_memorystatus_freeze.h>
108 #include <sys/proc_internal.h>
109
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113
114 #include <kern/bits.h>
115
116 #if CONFIG_CSR
117 #include <sys/csr.h>
118 #endif /* CONFIG_CSR */
119 #include <IOKit/IOBSD.h>
120
121 #if VM_MAP_DEBUG_APPLE_PROTECT
122 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
123 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
124
125 #if VM_MAP_DEBUG_FOURK
126 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
127 #endif /* VM_MAP_DEBUG_FOURK */
128
129 #if DEVELOPMENT || DEBUG
130
131 static int
132 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
133 {
134 #pragma unused(arg1, arg2)
135 vm_offset_t kaddr;
136 kern_return_t kr;
137 int error = 0;
138 int size = 0;
139
140 error = sysctl_handle_int(oidp, &size, 0, req);
141 if (error || !req->newptr) {
142 return error;
143 }
144
145 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
146
147 if (kr == KERN_SUCCESS) {
148 kmem_free(kernel_map, kaddr, size);
149 }
150
151 return error;
152 }
153
154 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
155 0, 0, &sysctl_kmem_alloc_contig, "I", "");
156
157 extern int vm_region_footprint;
158 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
159
160 #endif /* DEVELOPMENT || DEBUG */
161
162 static int
163 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
164 {
165 #pragma unused(arg1, arg2, oidp)
166 int error = 0;
167 int value;
168
169 value = task_self_region_footprint();
170 error = SYSCTL_OUT(req, &value, sizeof(int));
171 if (error) {
172 return error;
173 }
174
175 if (!req->newptr) {
176 return 0;
177 }
178
179 error = SYSCTL_IN(req, &value, sizeof(int));
180 if (error) {
181 return error;
182 }
183 task_self_region_footprint_set(value);
184 return 0;
185 }
186 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
187
188 static int
189 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
190 {
191 #pragma unused(arg1, arg2, oidp)
192 int error = 0;
193 int value;
194
195 value = (1 << thread_self_region_page_shift());
196 error = SYSCTL_OUT(req, &value, sizeof(int));
197 if (error) {
198 return error;
199 }
200
201 if (!req->newptr) {
202 return 0;
203 }
204
205 error = SYSCTL_IN(req, &value, sizeof(int));
206 if (error) {
207 return error;
208 }
209
210 if (value != 0 && value != 4096 && value != 16384) {
211 return EINVAL;
212 }
213
214 #if !__ARM_MIXED_PAGE_SIZE__
215 if (value != vm_map_page_size(current_map())) {
216 return EINVAL;
217 }
218 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
219
220 thread_self_region_page_shift_set(bit_first(value));
221 return 0;
222 }
223 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
224
225
226 #if DEVELOPMENT || DEBUG
227 extern int panic_on_unsigned_execute;
228 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
229 #endif /* DEVELOPMENT || DEBUG */
230
231 extern int cs_executable_create_upl;
232 extern int cs_executable_wire;
233 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
234 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
235
236 extern int apple_protect_pager_count;
237 extern int apple_protect_pager_count_mapped;
238 extern unsigned int apple_protect_pager_cache_limit;
239 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
240 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
241 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
242
243 #if DEVELOPMENT || DEBUG
244 extern int radar_20146450;
245 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
246
247 extern int macho_printf;
248 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
249
250 extern int apple_protect_pager_data_request_debug;
251 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
252
253 #if __arm__ || __arm64__
254 /* These are meant to support the page table accounting unit test. */
255 extern unsigned int arm_hardware_page_size;
256 extern unsigned int arm_pt_desc_size;
257 extern unsigned int arm_pt_root_size;
258 extern unsigned int free_page_size_tt_count;
259 extern unsigned int free_two_page_size_tt_count;
260 extern unsigned int free_tt_count;
261 extern unsigned int inuse_user_tteroot_count;
262 extern unsigned int inuse_kernel_tteroot_count;
263 extern unsigned int inuse_user_ttepages_count;
264 extern unsigned int inuse_kernel_ttepages_count;
265 extern unsigned int inuse_user_ptepages_count;
266 extern unsigned int inuse_kernel_ptepages_count;
267 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
268 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
269 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
270 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
271 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
272 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
273 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
274 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
275 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
276 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
277 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
278 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
279 #if DEVELOPMENT || DEBUG
280 extern unsigned long pmap_asid_flushes;
281 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
282 extern unsigned long pmap_asid_hits;
283 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
284 extern unsigned long pmap_asid_misses;
285 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
286 #endif
287 #endif /* __arm__ || __arm64__ */
288
289 #if __arm64__
290 extern int fourk_pager_data_request_debug;
291 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
292 #endif /* __arm64__ */
293 #endif /* DEVELOPMENT || DEBUG */
294
295 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
296 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
297 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
298 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
299 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
300 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
301 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
302 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
303 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
304 #if VM_SCAN_FOR_SHADOW_CHAIN
305 static int vm_shadow_max_enabled = 0; /* Disabled by default */
306 extern int proc_shadow_max(void);
307 static int
308 vm_shadow_max SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2, oidp)
311 int value = 0;
312
313 if (vm_shadow_max_enabled) {
314 value = proc_shadow_max();
315 }
316
317 return SYSCTL_OUT(req, &value, sizeof(value));
318 }
319 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
320 0, 0, &vm_shadow_max, "I", "");
321
322 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
323
324 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
325
326 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
327
328 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
329 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
330 /*
331 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
332 */
333
334 #if DEVELOPMENT || DEBUG
335 extern int allow_stack_exec, allow_data_exec;
336
337 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
338 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
339
340 #endif /* DEVELOPMENT || DEBUG */
341
342 static const char *prot_values[] = {
343 "none",
344 "read-only",
345 "write-only",
346 "read-write",
347 "execute-only",
348 "read-execute",
349 "write-execute",
350 "read-write-execute"
351 };
352
353 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)354 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
355 {
356 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
357 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
358 }
359
360 /*
361 * shared_region_unnest_logging: level of logging of unnesting events
362 * 0 - no logging
363 * 1 - throttled logging of unexpected unnesting events (default)
364 * 2 - unthrottled logging of unexpected unnesting events
365 * 3+ - unthrottled logging of all unnesting events
366 */
367 int shared_region_unnest_logging = 1;
368
369 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
370 &shared_region_unnest_logging, 0, "");
371
372 int vm_shared_region_unnest_log_interval = 10;
373 int shared_region_unnest_log_count_threshold = 5;
374
375 /*
376 * Shared cache path enforcement.
377 */
378
379 #if XNU_TARGET_OS_OSX
380
381 #if defined (__x86_64__)
382 static int scdir_enforce = 1;
383 #else /* defined (__x86_64__) */
384 static int scdir_enforce = 0; /* AOT caches live elsewhere */
385 #endif /* defined (__x86_64__) */
386
387 static char scdir_path[] = "/System/Library/dyld/";
388
389 #else /* XNU_TARGET_OS_OSX */
390
391 static int scdir_enforce = 0;
392 static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/";
393
394 #endif /* XNU_TARGET_OS_OSX */
395
396 static char driverkit_scdir_path[] = "/System/DriverKit/System/Library/dyld/";
397
398 #ifndef SECURE_KERNEL
399 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
400 {
401 #if CONFIG_CSR
402 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
403 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
404 return EPERM;
405 }
406 #endif /* CONFIG_CSR */
407 return sysctl_handle_int(oidp, arg1, arg2, req);
408 }
409
410 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
411 #endif
412
413 /* These log rate throttling state variables aren't thread safe, but
414 * are sufficient unto the task.
415 */
416 static int64_t last_unnest_log_time = 0;
417 static int shared_region_unnest_log_count = 0;
418
419 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)420 log_unnest_badness(
421 vm_map_t m,
422 vm_map_offset_t s,
423 vm_map_offset_t e,
424 boolean_t is_nested_map,
425 vm_map_offset_t lowest_unnestable_addr)
426 {
427 struct timeval tv;
428
429 if (shared_region_unnest_logging == 0) {
430 return;
431 }
432
433 if (shared_region_unnest_logging <= 2 &&
434 is_nested_map &&
435 s >= lowest_unnestable_addr) {
436 /*
437 * Unnesting of writable map entries is fine.
438 */
439 return;
440 }
441
442 if (shared_region_unnest_logging <= 1) {
443 microtime(&tv);
444 if ((tv.tv_sec - last_unnest_log_time) <
445 vm_shared_region_unnest_log_interval) {
446 if (shared_region_unnest_log_count++ >
447 shared_region_unnest_log_count_threshold) {
448 return;
449 }
450 } else {
451 last_unnest_log_time = tv.tv_sec;
452 shared_region_unnest_log_count = 0;
453 }
454 }
455
456 DTRACE_VM4(log_unnest_badness,
457 vm_map_t, m,
458 vm_map_offset_t, s,
459 vm_map_offset_t, e,
460 vm_map_offset_t, lowest_unnestable_addr);
461 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
462 }
463
464 int
useracc(user_addr_t addr,user_size_t len,int prot)465 useracc(
466 user_addr_t addr,
467 user_size_t len,
468 int prot)
469 {
470 vm_map_t map;
471
472 map = current_map();
473 return vm_map_check_protection(
474 map,
475 vm_map_trunc_page(addr,
476 vm_map_page_mask(map)),
477 vm_map_round_page(addr + len,
478 vm_map_page_mask(map)),
479 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
480 }
481
482 int
vslock(user_addr_t addr,user_size_t len)483 vslock(
484 user_addr_t addr,
485 user_size_t len)
486 {
487 kern_return_t kret;
488 vm_map_t map;
489
490 map = current_map();
491 kret = vm_map_wire_kernel(map,
492 vm_map_trunc_page(addr,
493 vm_map_page_mask(map)),
494 vm_map_round_page(addr + len,
495 vm_map_page_mask(map)),
496 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
497 FALSE);
498
499 switch (kret) {
500 case KERN_SUCCESS:
501 return 0;
502 case KERN_INVALID_ADDRESS:
503 case KERN_NO_SPACE:
504 return ENOMEM;
505 case KERN_PROTECTION_FAILURE:
506 return EACCES;
507 default:
508 return EINVAL;
509 }
510 }
511
512 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)513 vsunlock(
514 user_addr_t addr,
515 user_size_t len,
516 __unused int dirtied)
517 {
518 #if FIXME /* [ */
519 pmap_t pmap;
520 vm_page_t pg;
521 vm_map_offset_t vaddr;
522 ppnum_t paddr;
523 #endif /* FIXME ] */
524 kern_return_t kret;
525 vm_map_t map;
526
527 map = current_map();
528
529 #if FIXME /* [ */
530 if (dirtied) {
531 pmap = get_task_pmap(current_task());
532 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
533 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
534 vaddr += PAGE_SIZE) {
535 paddr = pmap_find_phys(pmap, vaddr);
536 pg = PHYS_TO_VM_PAGE(paddr);
537 vm_page_set_modified(pg);
538 }
539 }
540 #endif /* FIXME ] */
541 #ifdef lint
542 dirtied++;
543 #endif /* lint */
544 kret = vm_map_unwire(map,
545 vm_map_trunc_page(addr,
546 vm_map_page_mask(map)),
547 vm_map_round_page(addr + len,
548 vm_map_page_mask(map)),
549 FALSE);
550 switch (kret) {
551 case KERN_SUCCESS:
552 return 0;
553 case KERN_INVALID_ADDRESS:
554 case KERN_NO_SPACE:
555 return ENOMEM;
556 case KERN_PROTECTION_FAILURE:
557 return EACCES;
558 default:
559 return EINVAL;
560 }
561 }
562
563 int
subyte(user_addr_t addr,int byte)564 subyte(
565 user_addr_t addr,
566 int byte)
567 {
568 char character;
569
570 character = (char)byte;
571 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
572 }
573
574 int
suibyte(user_addr_t addr,int byte)575 suibyte(
576 user_addr_t addr,
577 int byte)
578 {
579 char character;
580
581 character = (char)byte;
582 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
583 }
584
585 int
fubyte(user_addr_t addr)586 fubyte(user_addr_t addr)
587 {
588 unsigned char byte;
589
590 if (copyin(addr, (void *) &byte, sizeof(char))) {
591 return -1;
592 }
593 return byte;
594 }
595
596 int
fuibyte(user_addr_t addr)597 fuibyte(user_addr_t addr)
598 {
599 unsigned char byte;
600
601 if (copyin(addr, (void *) &(byte), sizeof(char))) {
602 return -1;
603 }
604 return byte;
605 }
606
607 int
suword(user_addr_t addr,long word)608 suword(
609 user_addr_t addr,
610 long word)
611 {
612 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
613 }
614
615 long
fuword(user_addr_t addr)616 fuword(user_addr_t addr)
617 {
618 long word = 0;
619
620 if (copyin(addr, (void *) &word, sizeof(int))) {
621 return -1;
622 }
623 return word;
624 }
625
626 /* suiword and fuiword are the same as suword and fuword, respectively */
627
628 int
suiword(user_addr_t addr,long word)629 suiword(
630 user_addr_t addr,
631 long word)
632 {
633 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
634 }
635
636 long
fuiword(user_addr_t addr)637 fuiword(user_addr_t addr)
638 {
639 long word = 0;
640
641 if (copyin(addr, (void *) &word, sizeof(int))) {
642 return -1;
643 }
644 return word;
645 }
646
647 /*
648 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
649 * fetching and setting of process-sized size_t and pointer values.
650 */
651 int
sulong(user_addr_t addr,int64_t word)652 sulong(user_addr_t addr, int64_t word)
653 {
654 if (IS_64BIT_PROCESS(current_proc())) {
655 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
656 } else {
657 return suiword(addr, (long)word);
658 }
659 }
660
661 int64_t
fulong(user_addr_t addr)662 fulong(user_addr_t addr)
663 {
664 int64_t longword;
665
666 if (IS_64BIT_PROCESS(current_proc())) {
667 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
668 return -1;
669 }
670 return longword;
671 } else {
672 return (int64_t)fuiword(addr);
673 }
674 }
675
676 int
suulong(user_addr_t addr,uint64_t uword)677 suulong(user_addr_t addr, uint64_t uword)
678 {
679 if (IS_64BIT_PROCESS(current_proc())) {
680 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
681 } else {
682 return suiword(addr, (uint32_t)uword);
683 }
684 }
685
686 uint64_t
fuulong(user_addr_t addr)687 fuulong(user_addr_t addr)
688 {
689 uint64_t ulongword;
690
691 if (IS_64BIT_PROCESS(current_proc())) {
692 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
693 return -1ULL;
694 }
695 return ulongword;
696 } else {
697 return (uint64_t)fuiword(addr);
698 }
699 }
700
701 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)702 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
703 {
704 return ENOTSUP;
705 }
706
707 /*
708 * pid_for_task
709 *
710 * Find the BSD process ID for the Mach task associated with the given Mach port
711 * name
712 *
713 * Parameters: args User argument descriptor (see below)
714 *
715 * Indirect parameters: args->t Mach port name
716 * args->pid Process ID (returned value; see below)
717 *
718 * Returns: KERL_SUCCESS Success
719 * KERN_FAILURE Not success
720 *
721 * Implicit returns: args->pid Process ID
722 *
723 */
724 kern_return_t
pid_for_task(struct pid_for_task_args * args)725 pid_for_task(
726 struct pid_for_task_args *args)
727 {
728 mach_port_name_t t = args->t;
729 user_addr_t pid_addr = args->pid;
730 proc_t p;
731 task_t t1;
732 int pid = -1;
733 kern_return_t err = KERN_SUCCESS;
734
735 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
736 AUDIT_ARG(mach_port1, t);
737
738 t1 = port_name_to_task_name(t);
739
740 if (t1 == TASK_NULL) {
741 err = KERN_FAILURE;
742 goto pftout;
743 } else {
744 p = get_bsdtask_info(t1);
745 if (p) {
746 pid = proc_pid(p);
747 err = KERN_SUCCESS;
748 } else if (is_corpsetask(t1)) {
749 pid = task_pid(t1);
750 err = KERN_SUCCESS;
751 } else {
752 err = KERN_FAILURE;
753 }
754 }
755 task_deallocate(t1);
756 pftout:
757 AUDIT_ARG(pid, pid);
758 (void) copyout((char *) &pid, pid_addr, sizeof(int));
759 AUDIT_MACH_SYSCALL_EXIT(err);
760 return err;
761 }
762
763 /*
764 *
765 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
766 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
767 *
768 */
769 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
770
771 /*
772 * Routine: task_for_pid_posix_check
773 * Purpose:
774 * Verify that the current process should be allowed to
775 * get the target process's task port. This is only
776 * permitted if:
777 * - The current process is root
778 * OR all of the following are true:
779 * - The target process's real, effective, and saved uids
780 * are the same as the current proc's euid,
781 * - The target process's group set is a subset of the
782 * calling process's group set, and
783 * - The target process hasn't switched credentials.
784 *
785 * Returns: TRUE: permitted
786 * FALSE: denied
787 */
788 static int
task_for_pid_posix_check(proc_t target)789 task_for_pid_posix_check(proc_t target)
790 {
791 kauth_cred_t targetcred, mycred;
792 uid_t myuid;
793 int allowed;
794
795 /* No task_for_pid on bad targets */
796 if (target->p_stat == SZOMB) {
797 return FALSE;
798 }
799
800 mycred = kauth_cred_get();
801 myuid = kauth_cred_getuid(mycred);
802
803 /* If we're running as root, the check passes */
804 if (kauth_cred_issuser(mycred)) {
805 return TRUE;
806 }
807
808 /* We're allowed to get our own task port */
809 if (target == current_proc()) {
810 return TRUE;
811 }
812
813 /*
814 * Under DENY, only root can get another proc's task port,
815 * so no more checks are needed.
816 */
817 if (tfp_policy == KERN_TFP_POLICY_DENY) {
818 return FALSE;
819 }
820
821 targetcred = kauth_cred_proc_ref(target);
822 allowed = TRUE;
823
824 /* Do target's ruid, euid, and saved uid match my euid? */
825 if ((kauth_cred_getuid(targetcred) != myuid) ||
826 (kauth_cred_getruid(targetcred) != myuid) ||
827 (kauth_cred_getsvuid(targetcred) != myuid)) {
828 allowed = FALSE;
829 goto out;
830 }
831
832 /* Are target's groups a subset of my groups? */
833 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
834 allowed == 0) {
835 allowed = FALSE;
836 goto out;
837 }
838
839 /* Has target switched credentials? */
840 if (target->p_flag & P_SUGID) {
841 allowed = FALSE;
842 goto out;
843 }
844
845 out:
846 kauth_cred_unref(&targetcred);
847 return allowed;
848 }
849
850 /*
851 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
852 *
853 * Description: Waits for the user space daemon to respond to the request
854 * we made. Function declared non inline to be visible in
855 * stackshots and spindumps as well as debugging.
856 */
857 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)858 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
859 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
860 {
861 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
862 }
863
864 /*
865 * Routine: task_for_pid
866 * Purpose:
867 * Get the task port for another "process", named by its
868 * process ID on the same host as "target_task".
869 *
870 * Only permitted to privileged processes, or processes
871 * with the same user ID.
872 *
873 * Note: if pid == 0, an error is return no matter who is calling.
874 *
875 * XXX This should be a BSD system call, not a Mach trap!!!
876 */
877 kern_return_t
task_for_pid(struct task_for_pid_args * args)878 task_for_pid(
879 struct task_for_pid_args *args)
880 {
881 mach_port_name_t target_tport = args->target_tport;
882 int pid = args->pid;
883 user_addr_t task_addr = args->t;
884 proc_t p = PROC_NULL;
885 task_t t1 = TASK_NULL;
886 task_t task = TASK_NULL;
887 mach_port_name_t tret = MACH_PORT_NULL;
888 ipc_port_t tfpport = MACH_PORT_NULL;
889 void * sright = NULL;
890 int error = 0;
891 boolean_t is_current_proc = FALSE;
892 struct proc_ident pident = {0};
893
894 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
895 AUDIT_ARG(pid, pid);
896 AUDIT_ARG(mach_port1, target_tport);
897
898 /* Always check if pid == 0 */
899 if (pid == 0) {
900 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
901 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
902 return KERN_FAILURE;
903 }
904
905 t1 = port_name_to_task(target_tport);
906 if (t1 == TASK_NULL) {
907 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
908 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
909 return KERN_FAILURE;
910 }
911
912
913 p = proc_find(pid);
914 if (p == PROC_NULL) {
915 error = KERN_FAILURE;
916 goto tfpout;
917 }
918 pident = proc_ident(p);
919 is_current_proc = (p == current_proc());
920
921 #if CONFIG_AUDIT
922 AUDIT_ARG(process, p);
923 #endif
924
925 if (!(task_for_pid_posix_check(p))) {
926 error = KERN_FAILURE;
927 goto tfpout;
928 }
929
930 if (p->task == TASK_NULL) {
931 error = KERN_SUCCESS;
932 goto tfpout;
933 }
934
935 /*
936 * Grab a task reference and drop the proc reference as the proc ref
937 * shouldn't be held accross upcalls.
938 */
939 task = p->task;
940 task_reference(task);
941
942 proc_rele(p);
943 p = PROC_NULL;
944
945 #if CONFIG_MACF
946 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
947 if (error) {
948 error = KERN_FAILURE;
949 goto tfpout;
950 }
951 #endif
952
953 /* If we aren't root and target's task access port is set... */
954 if (!kauth_cred_issuser(kauth_cred_get()) &&
955 !is_current_proc &&
956 (task_get_task_access_port(task, &tfpport) == 0) &&
957 (tfpport != IPC_PORT_NULL)) {
958 if (tfpport == IPC_PORT_DEAD) {
959 error = KERN_PROTECTION_FAILURE;
960 goto tfpout;
961 }
962
963 /* Call up to the task access server */
964 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
965 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
966
967 if (error != MACH_MSG_SUCCESS) {
968 if (error == MACH_RCV_INTERRUPTED) {
969 error = KERN_ABORTED;
970 } else {
971 error = KERN_FAILURE;
972 }
973 goto tfpout;
974 }
975 }
976
977 /* Grant task port access */
978 extmod_statistics_incr_task_for_pid(task);
979
980 if (task == current_task()) {
981 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
982 sright = (void *)convert_task_to_port_pinned(task);
983 } else {
984 sright = (void *)convert_task_to_port(task);
985 }
986
987 /* Check if the task has been corpsified */
988 if (is_corpsetask(task)) {
989 /* task ref consumed by convert_task_to_port */
990 task = TASK_NULL;
991 ipc_port_release_send(sright);
992 error = KERN_FAILURE;
993 goto tfpout;
994 }
995
996 /* task ref consumed by convert_task_to_port */
997 task = TASK_NULL;
998 tret = ipc_port_copyout_send(
999 sright,
1000 get_task_ipcspace(current_task()));
1001
1002 error = KERN_SUCCESS;
1003
1004 tfpout:
1005 task_deallocate(t1);
1006 AUDIT_ARG(mach_port2, tret);
1007 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1008
1009 if (tfpport != IPC_PORT_NULL) {
1010 ipc_port_release_send(tfpport);
1011 }
1012 if (task != TASK_NULL) {
1013 task_deallocate(task);
1014 }
1015 if (p != PROC_NULL) {
1016 proc_rele(p);
1017 }
1018 AUDIT_MACH_SYSCALL_EXIT(error);
1019 return error;
1020 }
1021
1022 /*
1023 * Routine: task_name_for_pid
1024 * Purpose:
1025 * Get the task name port for another "process", named by its
1026 * process ID on the same host as "target_task".
1027 *
1028 * Only permitted to privileged processes, or processes
1029 * with the same user ID.
1030 *
1031 * XXX This should be a BSD system call, not a Mach trap!!!
1032 */
1033
1034 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1035 task_name_for_pid(
1036 struct task_name_for_pid_args *args)
1037 {
1038 mach_port_name_t target_tport = args->target_tport;
1039 int pid = args->pid;
1040 user_addr_t task_addr = args->t;
1041 proc_t p = PROC_NULL;
1042 task_t t1 = TASK_NULL;
1043 mach_port_name_t tret = MACH_PORT_NULL;
1044 void * sright;
1045 int error = 0, refheld = 0;
1046 kauth_cred_t target_cred;
1047
1048 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1049 AUDIT_ARG(pid, pid);
1050 AUDIT_ARG(mach_port1, target_tport);
1051
1052 t1 = port_name_to_task(target_tport);
1053 if (t1 == TASK_NULL) {
1054 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1055 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1056 return KERN_FAILURE;
1057 }
1058
1059 p = proc_find(pid);
1060 if (p != PROC_NULL) {
1061 AUDIT_ARG(process, p);
1062 target_cred = kauth_cred_proc_ref(p);
1063 refheld = 1;
1064
1065 if ((p->p_stat != SZOMB)
1066 && ((current_proc() == p)
1067 || kauth_cred_issuser(kauth_cred_get())
1068 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1069 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1070 if (p->task != TASK_NULL) {
1071 struct proc_ident pident = proc_ident(p);
1072
1073 task_t task = p->task;
1074
1075 task_reference(p->task);
1076 proc_rele(p);
1077 p = PROC_NULL;
1078 #if CONFIG_MACF
1079 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1080 if (error) {
1081 task_deallocate(task);
1082 goto noperm;
1083 }
1084 #endif
1085 sright = (void *)convert_task_name_to_port(task);
1086 task = NULL;
1087 tret = ipc_port_copyout_send(sright,
1088 get_task_ipcspace(current_task()));
1089 } else {
1090 tret = MACH_PORT_NULL;
1091 }
1092
1093 AUDIT_ARG(mach_port2, tret);
1094 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1095 task_deallocate(t1);
1096 error = KERN_SUCCESS;
1097 goto tnfpout;
1098 }
1099 }
1100
1101 #if CONFIG_MACF
1102 noperm:
1103 #endif
1104 task_deallocate(t1);
1105 tret = MACH_PORT_NULL;
1106 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1107 error = KERN_FAILURE;
1108 tnfpout:
1109 if (refheld != 0) {
1110 kauth_cred_unref(&target_cred);
1111 }
1112 if (p != PROC_NULL) {
1113 proc_rele(p);
1114 }
1115 AUDIT_MACH_SYSCALL_EXIT(error);
1116 return error;
1117 }
1118
1119 /*
1120 * Routine: task_inspect_for_pid
1121 * Purpose:
1122 * Get the task inspect port for another "process", named by its
1123 * process ID on the same host as "target_task".
1124 */
1125 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1126 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1127 {
1128 mach_port_name_t target_tport = args->target_tport;
1129 int pid = args->pid;
1130 user_addr_t task_addr = args->t;
1131
1132 proc_t proc = PROC_NULL;
1133 task_t t1 = TASK_NULL;
1134 task_inspect_t task_insp = TASK_INSPECT_NULL;
1135 mach_port_name_t tret = MACH_PORT_NULL;
1136 ipc_port_t tfpport = MACH_PORT_NULL;
1137 int error = 0;
1138 void *sright = NULL;
1139 boolean_t is_current_proc = FALSE;
1140 struct proc_ident pident = {0};
1141
1142 /* Disallow inspect port for kernel_task */
1143 if (pid == 0) {
1144 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1145 return EPERM;
1146 }
1147
1148 t1 = port_name_to_task(target_tport);
1149 if (t1 == TASK_NULL) {
1150 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1151 return EINVAL;
1152 }
1153
1154 proc = proc_find(pid);
1155 if (proc == PROC_NULL) {
1156 error = ESRCH;
1157 goto tifpout;
1158 }
1159 pident = proc_ident(proc);
1160 is_current_proc = (proc == current_proc());
1161
1162 if (!(task_for_pid_posix_check(proc))) {
1163 error = EPERM;
1164 goto tifpout;
1165 }
1166
1167 task_insp = proc->task;
1168 if (task_insp == TASK_INSPECT_NULL) {
1169 goto tifpout;
1170 }
1171
1172 /*
1173 * Grab a task reference and drop the proc reference before making any upcalls.
1174 */
1175 task_reference(task_insp);
1176
1177 proc_rele(proc);
1178 proc = PROC_NULL;
1179
1180 #if CONFIG_MACF
1181 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1182 if (error) {
1183 error = EPERM;
1184 goto tifpout;
1185 }
1186 #endif
1187
1188 /* If we aren't root and target's task access port is set... */
1189 if (!kauth_cred_issuser(kauth_cred_get()) &&
1190 !is_current_proc &&
1191 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1192 (tfpport != IPC_PORT_NULL)) {
1193 if (tfpport == IPC_PORT_DEAD) {
1194 error = EACCES;
1195 goto tifpout;
1196 }
1197
1198
1199 /* Call up to the task access server */
1200 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1201 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1202
1203 if (error != MACH_MSG_SUCCESS) {
1204 if (error == MACH_RCV_INTERRUPTED) {
1205 error = EINTR;
1206 } else {
1207 error = EPERM;
1208 }
1209 goto tifpout;
1210 }
1211 }
1212
1213 /* Check if the task has been corpsified */
1214 if (is_corpsetask(task_insp)) {
1215 error = EACCES;
1216 goto tifpout;
1217 }
1218
1219 /* could be IP_NULL, consumes a ref */
1220 sright = (void*) convert_task_inspect_to_port(task_insp);
1221 task_insp = TASK_INSPECT_NULL;
1222 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1223
1224 tifpout:
1225 task_deallocate(t1);
1226 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1227 if (proc != PROC_NULL) {
1228 proc_rele(proc);
1229 }
1230 if (tfpport != IPC_PORT_NULL) {
1231 ipc_port_release_send(tfpport);
1232 }
1233 if (task_insp != TASK_INSPECT_NULL) {
1234 task_deallocate(task_insp);
1235 }
1236
1237 *ret = error;
1238 return error;
1239 }
1240
1241 /*
1242 * Routine: task_read_for_pid
1243 * Purpose:
1244 * Get the task read port for another "process", named by its
1245 * process ID on the same host as "target_task".
1246 */
1247 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1248 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1249 {
1250 mach_port_name_t target_tport = args->target_tport;
1251 int pid = args->pid;
1252 user_addr_t task_addr = args->t;
1253
1254 proc_t proc = PROC_NULL;
1255 task_t t1 = TASK_NULL;
1256 task_read_t task_read = TASK_READ_NULL;
1257 mach_port_name_t tret = MACH_PORT_NULL;
1258 ipc_port_t tfpport = MACH_PORT_NULL;
1259 int error = 0;
1260 void *sright = NULL;
1261 boolean_t is_current_proc = FALSE;
1262 struct proc_ident pident = {0};
1263
1264 /* Disallow read port for kernel_task */
1265 if (pid == 0) {
1266 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1267 return EPERM;
1268 }
1269
1270 t1 = port_name_to_task(target_tport);
1271 if (t1 == TASK_NULL) {
1272 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1273 return EINVAL;
1274 }
1275
1276 proc = proc_find(pid);
1277 if (proc == PROC_NULL) {
1278 error = ESRCH;
1279 goto trfpout;
1280 }
1281 pident = proc_ident(proc);
1282 is_current_proc = (proc == current_proc());
1283
1284 if (!(task_for_pid_posix_check(proc))) {
1285 error = EPERM;
1286 goto trfpout;
1287 }
1288
1289 task_read = proc->task;
1290 if (task_read == TASK_INSPECT_NULL) {
1291 goto trfpout;
1292 }
1293
1294 /*
1295 * Grab a task reference and drop the proc reference before making any upcalls.
1296 */
1297 task_reference(task_read);
1298
1299 proc_rele(proc);
1300 proc = PROC_NULL;
1301
1302 #if CONFIG_MACF
1303 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1304 if (error) {
1305 error = EPERM;
1306 goto trfpout;
1307 }
1308 #endif
1309
1310 /* If we aren't root and target's task access port is set... */
1311 if (!kauth_cred_issuser(kauth_cred_get()) &&
1312 !is_current_proc &&
1313 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1314 (tfpport != IPC_PORT_NULL)) {
1315 if (tfpport == IPC_PORT_DEAD) {
1316 error = EACCES;
1317 goto trfpout;
1318 }
1319
1320
1321 /* Call up to the task access server */
1322 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1323 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1324
1325 if (error != MACH_MSG_SUCCESS) {
1326 if (error == MACH_RCV_INTERRUPTED) {
1327 error = EINTR;
1328 } else {
1329 error = EPERM;
1330 }
1331 goto trfpout;
1332 }
1333 }
1334
1335 /* Check if the task has been corpsified */
1336 if (is_corpsetask(task_read)) {
1337 error = EACCES;
1338 goto trfpout;
1339 }
1340
1341 /* could be IP_NULL, consumes a ref */
1342 sright = (void*) convert_task_read_to_port(task_read);
1343 task_read = TASK_READ_NULL;
1344 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1345
1346 trfpout:
1347 task_deallocate(t1);
1348 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1349 if (proc != PROC_NULL) {
1350 proc_rele(proc);
1351 }
1352 if (tfpport != IPC_PORT_NULL) {
1353 ipc_port_release_send(tfpport);
1354 }
1355 if (task_read != TASK_READ_NULL) {
1356 task_deallocate(task_read);
1357 }
1358
1359 *ret = error;
1360 return error;
1361 }
1362
1363 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1364 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1365 {
1366 task_t target = NULL;
1367 proc_t targetproc = PROC_NULL;
1368 int pid = args->pid;
1369 int error = 0;
1370 mach_port_t tfpport = MACH_PORT_NULL;
1371
1372 if (pid == 0) {
1373 error = EPERM;
1374 goto out;
1375 }
1376
1377 targetproc = proc_find(pid);
1378 if (targetproc == PROC_NULL) {
1379 error = ESRCH;
1380 goto out;
1381 }
1382
1383 if (!task_for_pid_posix_check(targetproc) &&
1384 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1385 error = EPERM;
1386 goto out;
1387 }
1388
1389 #if CONFIG_MACF
1390 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1391 if (error) {
1392 error = EPERM;
1393 goto out;
1394 }
1395 #endif
1396
1397 target = targetproc->task;
1398 #if XNU_TARGET_OS_OSX
1399 if (target != TASK_NULL) {
1400 /* If we aren't root and target's task access port is set... */
1401 if (!kauth_cred_issuser(kauth_cred_get()) &&
1402 targetproc != current_proc() &&
1403 (task_get_task_access_port(target, &tfpport) == 0) &&
1404 (tfpport != IPC_PORT_NULL)) {
1405 if (tfpport == IPC_PORT_DEAD) {
1406 error = EACCES;
1407 goto out;
1408 }
1409
1410 /* Call up to the task access server */
1411 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1412 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1413
1414 if (error != MACH_MSG_SUCCESS) {
1415 if (error == MACH_RCV_INTERRUPTED) {
1416 error = EINTR;
1417 } else {
1418 error = EPERM;
1419 }
1420 goto out;
1421 }
1422 }
1423 }
1424 #endif /* XNU_TARGET_OS_OSX */
1425
1426 task_reference(target);
1427 error = task_pidsuspend(target);
1428 if (error) {
1429 if (error == KERN_INVALID_ARGUMENT) {
1430 error = EINVAL;
1431 } else {
1432 error = EPERM;
1433 }
1434 }
1435 #if CONFIG_MEMORYSTATUS
1436 else {
1437 memorystatus_on_suspend(targetproc);
1438 }
1439 #endif
1440
1441 task_deallocate(target);
1442
1443 out:
1444 if (tfpport != IPC_PORT_NULL) {
1445 ipc_port_release_send(tfpport);
1446 }
1447
1448 if (targetproc != PROC_NULL) {
1449 proc_rele(targetproc);
1450 }
1451 *ret = error;
1452 return error;
1453 }
1454
1455 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1456 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1457 {
1458 mach_port_name_t target_tport = args->target_tport;
1459 int pid = args->pid;
1460 user_addr_t task_addr = args->t;
1461 proc_t p = PROC_NULL;
1462 task_t t1 = TASK_NULL;
1463 task_t task = TASK_NULL;
1464 mach_port_name_t tret = MACH_PORT_NULL;
1465 ipc_port_t tfpport = MACH_PORT_NULL;
1466 ipc_port_t sright = NULL;
1467 int error = 0;
1468 boolean_t is_current_proc = FALSE;
1469 struct proc_ident pident = {0};
1470
1471 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1472 AUDIT_ARG(pid, pid);
1473 AUDIT_ARG(mach_port1, target_tport);
1474
1475 /* Always check if pid == 0 */
1476 if (pid == 0) {
1477 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1478 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1479 return KERN_FAILURE;
1480 }
1481
1482 t1 = port_name_to_task(target_tport);
1483 if (t1 == TASK_NULL) {
1484 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1485 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1486 return KERN_FAILURE;
1487 }
1488
1489 p = proc_find(pid);
1490 if (p == PROC_NULL) {
1491 error = KERN_FAILURE;
1492 goto tfpout;
1493 }
1494 pident = proc_ident(p);
1495 is_current_proc = (p == current_proc());
1496
1497 #if CONFIG_AUDIT
1498 AUDIT_ARG(process, p);
1499 #endif
1500
1501 if (!(task_for_pid_posix_check(p))) {
1502 error = KERN_FAILURE;
1503 goto tfpout;
1504 }
1505
1506 if (p->task == TASK_NULL) {
1507 error = KERN_SUCCESS;
1508 goto tfpout;
1509 }
1510
1511 /*
1512 * Grab a task reference and drop the proc reference before making any upcalls.
1513 */
1514 task = p->task;
1515 task_reference(task);
1516
1517 proc_rele(p);
1518 p = PROC_NULL;
1519
1520 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1521 #if CONFIG_MACF
1522 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1523 if (error) {
1524 error = KERN_FAILURE;
1525 goto tfpout;
1526 }
1527 #endif
1528
1529 /* If we aren't root and target's task access port is set... */
1530 if (!kauth_cred_issuser(kauth_cred_get()) &&
1531 !is_current_proc &&
1532 (task_get_task_access_port(task, &tfpport) == 0) &&
1533 (tfpport != IPC_PORT_NULL)) {
1534 if (tfpport == IPC_PORT_DEAD) {
1535 error = KERN_PROTECTION_FAILURE;
1536 goto tfpout;
1537 }
1538
1539
1540 /* Call up to the task access server */
1541 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1542 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1543
1544 if (error != MACH_MSG_SUCCESS) {
1545 if (error == MACH_RCV_INTERRUPTED) {
1546 error = KERN_ABORTED;
1547 } else {
1548 error = KERN_FAILURE;
1549 }
1550 goto tfpout;
1551 }
1552 }
1553 }
1554
1555 /* Check if the task has been corpsified */
1556 if (is_corpsetask(task)) {
1557 error = KERN_FAILURE;
1558 goto tfpout;
1559 }
1560
1561 error = task_get_debug_control_port(task, &sright);
1562 if (error != KERN_SUCCESS) {
1563 goto tfpout;
1564 }
1565
1566 tret = ipc_port_copyout_send(
1567 sright,
1568 get_task_ipcspace(current_task()));
1569
1570 error = KERN_SUCCESS;
1571
1572 tfpout:
1573 task_deallocate(t1);
1574 AUDIT_ARG(mach_port2, tret);
1575 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1576
1577 if (tfpport != IPC_PORT_NULL) {
1578 ipc_port_release_send(tfpport);
1579 }
1580 if (task != TASK_NULL) {
1581 task_deallocate(task);
1582 }
1583 if (p != PROC_NULL) {
1584 proc_rele(p);
1585 }
1586 AUDIT_MACH_SYSCALL_EXIT(error);
1587 return error;
1588 }
1589
1590 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1591 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1592 {
1593 task_t target = NULL;
1594 proc_t targetproc = PROC_NULL;
1595 int pid = args->pid;
1596 int error = 0;
1597 mach_port_t tfpport = MACH_PORT_NULL;
1598
1599 if (pid == 0) {
1600 error = EPERM;
1601 goto out;
1602 }
1603
1604 targetproc = proc_find(pid);
1605 if (targetproc == PROC_NULL) {
1606 error = ESRCH;
1607 goto out;
1608 }
1609
1610 if (!task_for_pid_posix_check(targetproc) &&
1611 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1612 error = EPERM;
1613 goto out;
1614 }
1615
1616 #if CONFIG_MACF
1617 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1618 if (error) {
1619 error = EPERM;
1620 goto out;
1621 }
1622 #endif
1623
1624 target = targetproc->task;
1625 #if XNU_TARGET_OS_OSX
1626 if (target != TASK_NULL) {
1627 /* If we aren't root and target's task access port is set... */
1628 if (!kauth_cred_issuser(kauth_cred_get()) &&
1629 targetproc != current_proc() &&
1630 (task_get_task_access_port(target, &tfpport) == 0) &&
1631 (tfpport != IPC_PORT_NULL)) {
1632 if (tfpport == IPC_PORT_DEAD) {
1633 error = EACCES;
1634 goto out;
1635 }
1636
1637 /* Call up to the task access server */
1638 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1639 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1640
1641 if (error != MACH_MSG_SUCCESS) {
1642 if (error == MACH_RCV_INTERRUPTED) {
1643 error = EINTR;
1644 } else {
1645 error = EPERM;
1646 }
1647 goto out;
1648 }
1649 }
1650 }
1651 #endif /* XNU_TARGET_OS_OSX */
1652
1653 #if !XNU_TARGET_OS_OSX
1654 #if SOCKETS
1655 resume_proc_sockets(targetproc);
1656 #endif /* SOCKETS */
1657 #endif /* !XNU_TARGET_OS_OSX */
1658
1659 task_reference(target);
1660
1661 #if CONFIG_MEMORYSTATUS
1662 memorystatus_on_resume(targetproc);
1663 #endif
1664
1665 error = task_pidresume(target);
1666 if (error) {
1667 if (error == KERN_INVALID_ARGUMENT) {
1668 error = EINVAL;
1669 } else {
1670 if (error == KERN_MEMORY_ERROR) {
1671 psignal(targetproc, SIGKILL);
1672 error = EIO;
1673 } else {
1674 error = EPERM;
1675 }
1676 }
1677 }
1678
1679 task_deallocate(target);
1680
1681 out:
1682 if (tfpport != IPC_PORT_NULL) {
1683 ipc_port_release_send(tfpport);
1684 }
1685
1686 if (targetproc != PROC_NULL) {
1687 proc_rele(targetproc);
1688 }
1689
1690 *ret = error;
1691 return error;
1692 }
1693
1694 #if !XNU_TARGET_OS_OSX
1695 /*
1696 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1697 * When a process is specified, this call is blocking, otherwise we wake up the
1698 * freezer thread and do not block on a process being frozen.
1699 */
1700 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1701 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1702 {
1703 int error = 0;
1704 proc_t targetproc = PROC_NULL;
1705 int pid = args->pid;
1706
1707 #ifndef CONFIG_FREEZE
1708 #pragma unused(pid)
1709 #else
1710
1711 /*
1712 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1713 */
1714
1715 if (pid >= 0) {
1716 targetproc = proc_find(pid);
1717
1718 if (targetproc == PROC_NULL) {
1719 error = ESRCH;
1720 goto out;
1721 }
1722
1723 if (!task_for_pid_posix_check(targetproc)) {
1724 error = EPERM;
1725 goto out;
1726 }
1727 }
1728
1729 #if CONFIG_MACF
1730 //Note that targetproc may be null
1731 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1732 if (error) {
1733 error = EPERM;
1734 goto out;
1735 }
1736 #endif
1737
1738 if (pid == -2) {
1739 vm_pageout_anonymous_pages();
1740 } else if (pid == -1) {
1741 memorystatus_on_inactivity(targetproc);
1742 } else {
1743 error = memorystatus_freeze_process_sync(targetproc);
1744 }
1745
1746 out:
1747
1748 #endif /* CONFIG_FREEZE */
1749
1750 if (targetproc != PROC_NULL) {
1751 proc_rele(targetproc);
1752 }
1753 *ret = error;
1754 return error;
1755 }
1756 #endif /* !XNU_TARGET_OS_OSX */
1757
1758 #if SOCKETS
1759 int
networking_memstatus_callout(proc_t p,uint32_t status)1760 networking_memstatus_callout(proc_t p, uint32_t status)
1761 {
1762 struct fileproc *fp;
1763
1764 /*
1765 * proc list lock NOT held
1766 * proc lock NOT held
1767 * a reference on the proc has been held / shall be dropped by the caller.
1768 */
1769 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1770 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1771
1772 proc_fdlock(p);
1773
1774 fdt_foreach(fp, p) {
1775 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1776 #if NECP
1777 case DTYPE_NETPOLICY:
1778 necp_fd_memstatus(p, status,
1779 (struct necp_fd_data *)fp_get_data(fp));
1780 break;
1781 #endif /* NECP */
1782 #if SKYWALK
1783 case DTYPE_CHANNEL:
1784 kern_channel_memstatus(p, status,
1785 (struct kern_channel *)fp_get_data(fp));
1786 break;
1787 #endif /* SKYWALK */
1788 default:
1789 break;
1790 }
1791 }
1792 proc_fdunlock(p);
1793
1794 return 1;
1795 }
1796
1797 #if SKYWALK
1798 /*
1799 * Since we make multiple passes across the fileproc array, record the
1800 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1801 * large enough to accomodate most, if not all cases. If we find more,
1802 * we'll go to the slow path during second pass.
1803 */
1804 #define MAX_CHANNELS 8 /* should be more than enough */
1805 #endif /* SKYWALK */
1806
1807 static int
networking_defunct_callout(proc_t p,void * arg)1808 networking_defunct_callout(proc_t p, void *arg)
1809 {
1810 struct pid_shutdown_sockets_args *args = arg;
1811 int pid = args->pid;
1812 int level = args->level;
1813 struct fileproc *fp;
1814 #if SKYWALK
1815 int i;
1816 int channel_count = 0;
1817 struct kern_channel *channel_array[MAX_CHANNELS];
1818
1819 bzero(&channel_array, sizeof(channel_array));
1820 #endif /* SKYWALK */
1821
1822 proc_fdlock(p);
1823
1824 fdt_foreach(fp, p) {
1825 struct fileglob *fg = fp->fp_glob;
1826
1827 switch (FILEGLOB_DTYPE(fg)) {
1828 case DTYPE_SOCKET: {
1829 struct socket *so = (struct socket *)fg_get_data(fg);
1830 if (proc_getpid(p) == pid || so->last_pid == pid ||
1831 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1832 /* Call networking stack with socket and level */
1833 (void)socket_defunct(p, so, level);
1834 }
1835 break;
1836 }
1837 #if NECP
1838 case DTYPE_NETPOLICY:
1839 /* first pass: defunct necp and get stats for ntstat */
1840 if (proc_getpid(p) == pid) {
1841 necp_fd_defunct(p,
1842 (struct necp_fd_data *)fg_get_data(fg));
1843 }
1844 break;
1845 #endif /* NECP */
1846 #if SKYWALK
1847 case DTYPE_CHANNEL:
1848 /* first pass: get channels and total count */
1849 if (proc_getpid(p) == pid) {
1850 if (channel_count < MAX_CHANNELS) {
1851 channel_array[channel_count] =
1852 (struct kern_channel *)fg_get_data(fg);
1853 }
1854 ++channel_count;
1855 }
1856 break;
1857 #endif /* SKYWALK */
1858 default:
1859 break;
1860 }
1861 }
1862
1863 #if SKYWALK
1864 /*
1865 * Second pass: defunct channels/flows (after NECP). Handle
1866 * the common case of up to MAX_CHANNELS count with fast path,
1867 * and traverse the fileproc array again only if we exceed it.
1868 */
1869 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1870 ASSERT(proc_getpid(p) == pid);
1871 for (i = 0; i < channel_count; i++) {
1872 ASSERT(channel_array[i] != NULL);
1873 kern_channel_defunct(p, channel_array[i]);
1874 }
1875 } else if (channel_count != 0) {
1876 ASSERT(proc_getpid(p) == pid);
1877 fdt_foreach(fp, p) {
1878 struct fileglob *fg = fp->fp_glob;
1879
1880 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1881 kern_channel_defunct(p,
1882 (struct kern_channel *)fg_get_data(fg));
1883 }
1884 }
1885 }
1886 #endif /* SKYWALK */
1887 proc_fdunlock(p);
1888
1889 return PROC_RETURNED;
1890 }
1891
1892 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1893 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1894 {
1895 int error = 0;
1896 proc_t targetproc = PROC_NULL;
1897 int pid = args->pid;
1898 int level = args->level;
1899
1900 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1901 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1902 error = EINVAL;
1903 goto out;
1904 }
1905
1906 targetproc = proc_find(pid);
1907 if (targetproc == PROC_NULL) {
1908 error = ESRCH;
1909 goto out;
1910 }
1911
1912 if (!task_for_pid_posix_check(targetproc) &&
1913 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1914 error = EPERM;
1915 goto out;
1916 }
1917
1918 #if CONFIG_MACF
1919 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1920 if (error) {
1921 error = EPERM;
1922 goto out;
1923 }
1924 #endif
1925
1926 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1927 networking_defunct_callout, args, NULL, NULL);
1928
1929 out:
1930 if (targetproc != PROC_NULL) {
1931 proc_rele(targetproc);
1932 }
1933 *ret = error;
1934 return error;
1935 }
1936
1937 #endif /* SOCKETS */
1938
1939 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1940 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1941 __unused int arg2, struct sysctl_req *req)
1942 {
1943 int error = 0;
1944 int new_value;
1945
1946 error = SYSCTL_OUT(req, arg1, sizeof(int));
1947 if (error || req->newptr == USER_ADDR_NULL) {
1948 return error;
1949 }
1950
1951 if (!kauth_cred_issuser(kauth_cred_get())) {
1952 return EPERM;
1953 }
1954
1955 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1956 goto out;
1957 }
1958 if ((new_value == KERN_TFP_POLICY_DENY)
1959 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1960 tfp_policy = new_value;
1961 } else {
1962 error = EINVAL;
1963 }
1964 out:
1965 return error;
1966 }
1967
1968 #if defined(SECURE_KERNEL)
1969 static int kern_secure_kernel = 1;
1970 #else
1971 static int kern_secure_kernel = 0;
1972 #endif
1973
1974 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1975
1976 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1977 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1978 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
1979
1980 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1981 &shared_region_trace_level, 0, "");
1982 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1983 &shared_region_version, 0, "");
1984 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1985 &shared_region_persistence, 0, "");
1986
1987 /*
1988 * shared_region_check_np:
1989 *
1990 * This system call is intended for dyld.
1991 *
1992 * dyld calls this when any process starts to see if the process's shared
1993 * region is already set up and ready to use.
1994 * This call returns the base address of the first mapping in the
1995 * process's shared region's first mapping.
1996 * dyld will then check what's mapped at that address.
1997 *
1998 * If the shared region is empty, dyld will then attempt to map the shared
1999 * cache file in the shared region via the shared_region_map_np() system call.
2000 *
2001 * If something's already mapped in the shared region, dyld will check if it
2002 * matches the shared cache it would like to use for that process.
2003 * If it matches, evrything's ready and the process can proceed and use the
2004 * shared region.
2005 * If it doesn't match, dyld will unmap the shared region and map the shared
2006 * cache into the process's address space via mmap().
2007 *
2008 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2009 * the shared region. We will remove the shared_region reference from the task.
2010 *
2011 * ERROR VALUES
2012 * EINVAL no shared region
2013 * ENOMEM shared region is empty
2014 * EFAULT bad address for "start_address"
2015 */
2016 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2017 shared_region_check_np(
2018 __unused struct proc *p,
2019 struct shared_region_check_np_args *uap,
2020 __unused int *retvalp)
2021 {
2022 vm_shared_region_t shared_region;
2023 mach_vm_offset_t start_address = 0;
2024 int error = 0;
2025 kern_return_t kr;
2026 task_t task = current_task();
2027
2028 SHARED_REGION_TRACE_DEBUG(
2029 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2030 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2031 proc_getpid(p), p->p_comm,
2032 (uint64_t)uap->start_address));
2033
2034 /* retrieve the current tasks's shared region */
2035 shared_region = vm_shared_region_get(task);
2036 if (shared_region != NULL) {
2037 /*
2038 * A NULL argument is used by dyld to indicate the task
2039 * has unmapped its shared region.
2040 */
2041 if (uap->start_address == 0) {
2042 vm_shared_region_set(task, NULL);
2043 } else {
2044 /* retrieve address of its first mapping... */
2045 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2046 if (kr != KERN_SUCCESS) {
2047 error = ENOMEM;
2048 } else {
2049 #if __has_feature(ptrauth_calls)
2050 /*
2051 * Remap any section of the shared library that
2052 * has authenticated pointers into private memory.
2053 */
2054 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2055 error = ENOMEM;
2056 }
2057 #endif /* __has_feature(ptrauth_calls) */
2058
2059 /* ... and give it to the caller */
2060 if (error == 0) {
2061 error = copyout(&start_address,
2062 (user_addr_t) uap->start_address,
2063 sizeof(start_address));
2064 }
2065 if (error != 0) {
2066 SHARED_REGION_TRACE_ERROR(
2067 ("shared_region: %p [%d(%s)] "
2068 "check_np(0x%llx) "
2069 "copyout(0x%llx) error %d\n",
2070 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2071 proc_getpid(p), p->p_comm,
2072 (uint64_t)uap->start_address, (uint64_t)start_address,
2073 error));
2074 }
2075 }
2076 }
2077 vm_shared_region_deallocate(shared_region);
2078 } else {
2079 /* no shared region ! */
2080 error = EINVAL;
2081 }
2082
2083 SHARED_REGION_TRACE_DEBUG(
2084 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2085 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2086 proc_getpid(p), p->p_comm,
2087 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2088
2089 return error;
2090 }
2091
2092
2093 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2094 shared_region_copyin(
2095 struct proc *p,
2096 user_addr_t user_addr,
2097 unsigned int count,
2098 unsigned int element_size,
2099 void *kernel_data)
2100 {
2101 int error = 0;
2102 vm_size_t size = count * element_size;
2103
2104 error = copyin(user_addr, kernel_data, size);
2105 if (error) {
2106 SHARED_REGION_TRACE_ERROR(
2107 ("shared_region: %p [%d(%s)] map(): "
2108 "copyin(0x%llx, %ld) failed (error=%d)\n",
2109 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2110 proc_getpid(p), p->p_comm,
2111 (uint64_t)user_addr, (long)size, error));
2112 }
2113 return error;
2114 }
2115
2116 /*
2117 * A reasonable upper limit to prevent overflow of allocation/copyin.
2118 */
2119 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2120
2121 /* forward declaration */
2122 __attribute__((noinline))
2123 static void shared_region_map_and_slide_cleanup(
2124 struct proc *p,
2125 uint32_t files_count,
2126 struct _sr_file_mappings *sr_file_mappings,
2127 struct vm_shared_region *shared_region,
2128 struct vnode *scdir_vp);
2129
2130 /*
2131 * Setup part of _shared_region_map_and_slide().
2132 * It had to be broken out of _shared_region_map_and_slide() to
2133 * prevent compiler inlining from blowing out the stack.
2134 */
2135 __attribute__((noinline))
2136 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode ** scdir_vp,struct vnode * rdir_vp)2137 shared_region_map_and_slide_setup(
2138 struct proc *p,
2139 uint32_t files_count,
2140 struct shared_file_np *files,
2141 uint32_t mappings_count,
2142 struct shared_file_mapping_slide_np *mappings,
2143 struct _sr_file_mappings **sr_file_mappings,
2144 struct vm_shared_region **shared_region_ptr,
2145 struct vnode **scdir_vp,
2146 struct vnode *rdir_vp)
2147 {
2148 int error = 0;
2149 struct _sr_file_mappings *srfmp;
2150 uint32_t mappings_next;
2151 struct vnode_attr va;
2152 off_t fs;
2153 #if CONFIG_MACF
2154 vm_prot_t maxprot = VM_PROT_ALL;
2155 #endif
2156 uint32_t i;
2157 struct vm_shared_region *shared_region = NULL;
2158 boolean_t is_driverkit = task_is_driver(current_task());
2159 const char *expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2160
2161 SHARED_REGION_TRACE_DEBUG(
2162 ("shared_region: %p [%d(%s)] -> map\n",
2163 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2164 proc_getpid(p), p->p_comm));
2165
2166 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2167 error = E2BIG;
2168 goto done;
2169 }
2170 if (files_count == 0) {
2171 error = EINVAL;
2172 goto done;
2173 }
2174 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2175 Z_WAITOK | Z_ZERO);
2176 if (*sr_file_mappings == NULL) {
2177 error = ENOMEM;
2178 goto done;
2179 }
2180 mappings_next = 0;
2181 for (i = 0; i < files_count; i++) {
2182 srfmp = &(*sr_file_mappings)[i];
2183 srfmp->fd = files[i].sf_fd;
2184 srfmp->mappings_count = files[i].sf_mappings_count;
2185 srfmp->mappings = &mappings[mappings_next];
2186 mappings_next += srfmp->mappings_count;
2187 if (mappings_next > mappings_count) {
2188 error = EINVAL;
2189 goto done;
2190 }
2191 srfmp->slide = files[i].sf_slide;
2192 }
2193
2194 if (scdir_enforce) {
2195 /* get vnode for expected_scdir_path */
2196 error = vnode_lookup(expected_scdir_path, 0, scdir_vp, vfs_context_current());
2197 if (error) {
2198 SHARED_REGION_TRACE_ERROR(
2199 ("shared_region: %p [%d(%s)]: "
2200 "vnode_lookup(%s) failed (error=%d)\n",
2201 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2202 proc_getpid(p), p->p_comm,
2203 expected_scdir_path, error));
2204 goto done;
2205 }
2206 }
2207
2208 /* get the process's shared region (setup in vm_map_exec()) */
2209 shared_region = vm_shared_region_trim_and_get(current_task());
2210 *shared_region_ptr = shared_region;
2211 if (shared_region == NULL) {
2212 SHARED_REGION_TRACE_ERROR(
2213 ("shared_region: %p [%d(%s)] map(): "
2214 "no shared region\n",
2215 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2216 proc_getpid(p), p->p_comm));
2217 error = EINVAL;
2218 goto done;
2219 }
2220
2221 /*
2222 * Check the shared region matches the current root
2223 * directory of this process. Deny the mapping to
2224 * avoid tainting the shared region with something that
2225 * doesn't quite belong into it.
2226 */
2227 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2228 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2229 SHARED_REGION_TRACE_ERROR(
2230 ("shared_region: map(%p) root_dir mismatch\n",
2231 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2232 error = EPERM;
2233 goto done;
2234 }
2235
2236
2237 for (srfmp = &(*sr_file_mappings)[0];
2238 srfmp < &(*sr_file_mappings)[files_count];
2239 srfmp++) {
2240 if (srfmp->mappings_count == 0) {
2241 /* no mappings here... */
2242 continue;
2243 }
2244
2245 /* get file structure from file descriptor */
2246 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2247 if (error) {
2248 SHARED_REGION_TRACE_ERROR(
2249 ("shared_region: %p [%d(%s)] map: "
2250 "fd=%d lookup failed (error=%d)\n",
2251 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2252 proc_getpid(p), p->p_comm, srfmp->fd, error));
2253 goto done;
2254 }
2255
2256 /* we need at least read permission on the file */
2257 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2258 SHARED_REGION_TRACE_ERROR(
2259 ("shared_region: %p [%d(%s)] map: "
2260 "fd=%d not readable\n",
2261 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2262 proc_getpid(p), p->p_comm, srfmp->fd));
2263 error = EPERM;
2264 goto done;
2265 }
2266
2267 /* get vnode from file structure */
2268 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2269 if (error) {
2270 SHARED_REGION_TRACE_ERROR(
2271 ("shared_region: %p [%d(%s)] map: "
2272 "fd=%d getwithref failed (error=%d)\n",
2273 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2274 proc_getpid(p), p->p_comm, srfmp->fd, error));
2275 goto done;
2276 }
2277 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2278
2279 /* make sure the vnode is a regular file */
2280 if (srfmp->vp->v_type != VREG) {
2281 SHARED_REGION_TRACE_ERROR(
2282 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2283 "not a file (type=%d)\n",
2284 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2285 proc_getpid(p), p->p_comm,
2286 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2287 srfmp->vp->v_name, srfmp->vp->v_type));
2288 error = EINVAL;
2289 goto done;
2290 }
2291
2292 #if CONFIG_MACF
2293 /* pass in 0 for the offset argument because AMFI does not need the offset
2294 * of the shared cache */
2295 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2296 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
2297 if (error) {
2298 goto done;
2299 }
2300 #endif /* MAC */
2301
2302 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2303 /*
2304 * Check if the shared cache is in the trust cache;
2305 * if so, we can skip the root ownership check.
2306 */
2307 #if DEVELOPMENT || DEBUG
2308 /*
2309 * Skip both root ownership and trust cache check if
2310 * enforcement is disabled.
2311 */
2312 if (!cs_system_enforcement()) {
2313 goto after_root_check;
2314 }
2315 #endif /* DEVELOPMENT || DEBUG */
2316 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2317 if (blob == NULL) {
2318 SHARED_REGION_TRACE_ERROR(
2319 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2320 "missing CS blob\n",
2321 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2322 proc_getpid(p), p->p_comm,
2323 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2324 srfmp->vp->v_name));
2325 goto root_check;
2326 }
2327 const uint8_t *cdhash = csblob_get_cdhash(blob);
2328 if (cdhash == NULL) {
2329 SHARED_REGION_TRACE_ERROR(
2330 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2331 "missing cdhash\n",
2332 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2333 proc_getpid(p), p->p_comm,
2334 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2335 srfmp->vp->v_name));
2336 goto root_check;
2337 }
2338 uint32_t result = pmap_lookup_in_static_trust_cache(cdhash);
2339 boolean_t in_trust_cache = result & (TC_LOOKUP_FOUND << TC_LOOKUP_RESULT_SHIFT);
2340 if (!in_trust_cache) {
2341 SHARED_REGION_TRACE_ERROR(
2342 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2343 "not in trust cache\n",
2344 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2345 proc_getpid(p), p->p_comm,
2346 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2347 srfmp->vp->v_name));
2348 goto root_check;
2349 }
2350 goto after_root_check;
2351 root_check:
2352 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2353
2354 /* The shared cache file must be owned by root */
2355 VATTR_INIT(&va);
2356 VATTR_WANTED(&va, va_uid);
2357 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2358 if (error) {
2359 SHARED_REGION_TRACE_ERROR(
2360 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2361 "vnode_getattr(%p) failed (error=%d)\n",
2362 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2363 proc_getpid(p), p->p_comm,
2364 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2365 srfmp->vp->v_name,
2366 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2367 error));
2368 goto done;
2369 }
2370 if (va.va_uid != 0) {
2371 SHARED_REGION_TRACE_ERROR(
2372 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2373 "owned by uid=%d instead of 0\n",
2374 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2375 proc_getpid(p), p->p_comm,
2376 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2377 srfmp->vp->v_name, va.va_uid));
2378 error = EPERM;
2379 goto done;
2380 }
2381
2382 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2383 after_root_check:
2384 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2385
2386 #if CONFIG_CSR
2387 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2388 VATTR_INIT(&va);
2389 VATTR_WANTED(&va, va_flags);
2390 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2391 if (error) {
2392 SHARED_REGION_TRACE_ERROR(
2393 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2394 "vnode_getattr(%p) failed (error=%d)\n",
2395 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2396 proc_getpid(p), p->p_comm,
2397 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2398 srfmp->vp->v_name,
2399 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2400 error));
2401 goto done;
2402 }
2403
2404 if (!(va.va_flags & SF_RESTRICTED)) {
2405 /*
2406 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2407 * the shared cache file is NOT SIP-protected, so reject the
2408 * mapping request
2409 */
2410 SHARED_REGION_TRACE_ERROR(
2411 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2412 "vnode is not SIP-protected. \n",
2413 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2414 proc_getpid(p), p->p_comm,
2415 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2416 srfmp->vp->v_name));
2417 error = EPERM;
2418 goto done;
2419 }
2420 }
2421 #else /* CONFIG_CSR */
2422 /* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */
2423
2424 assert(rdir_vp != NULL);
2425 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2426 SHARED_REGION_TRACE_ERROR(
2427 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2428 "not on process's root volume\n",
2429 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2430 proc_getpid(p), p->p_comm,
2431 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2432 srfmp->vp->v_name));
2433 error = EPERM;
2434 goto done;
2435 }
2436 #endif /* CONFIG_CSR */
2437
2438 if (scdir_enforce) {
2439 /* ensure parent is scdir_vp */
2440 assert(*scdir_vp != NULL);
2441 if (vnode_parent(srfmp->vp) != *scdir_vp) {
2442 SHARED_REGION_TRACE_ERROR(
2443 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2444 "shared cache file not in %s\n",
2445 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2446 proc_getpid(p), p->p_comm,
2447 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2448 srfmp->vp->v_name, expected_scdir_path));
2449 error = EPERM;
2450 goto done;
2451 }
2452 }
2453
2454 /* get vnode size */
2455 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2456 if (error) {
2457 SHARED_REGION_TRACE_ERROR(
2458 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2459 "vnode_size(%p) failed (error=%d)\n",
2460 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2461 proc_getpid(p), p->p_comm,
2462 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2463 srfmp->vp->v_name,
2464 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2465 goto done;
2466 }
2467 srfmp->file_size = fs;
2468
2469 /* get the file's memory object handle */
2470 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2471 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2472 SHARED_REGION_TRACE_ERROR(
2473 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2474 "no memory object\n",
2475 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2476 proc_getpid(p), p->p_comm,
2477 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2478 srfmp->vp->v_name));
2479 error = EINVAL;
2480 goto done;
2481 }
2482
2483 /* check that the mappings are properly covered by code signatures */
2484 if (!cs_system_enforcement()) {
2485 /* code signing is not enforced: no need to check */
2486 } else {
2487 for (i = 0; i < srfmp->mappings_count; i++) {
2488 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2489 /* zero-filled mapping: not backed by the file */
2490 continue;
2491 }
2492 if (ubc_cs_is_range_codesigned(srfmp->vp,
2493 srfmp->mappings[i].sms_file_offset,
2494 srfmp->mappings[i].sms_size)) {
2495 /* this mapping is fully covered by code signatures */
2496 continue;
2497 }
2498 SHARED_REGION_TRACE_ERROR(
2499 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2500 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2501 "is not code-signed\n",
2502 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2503 proc_getpid(p), p->p_comm,
2504 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2505 srfmp->vp->v_name,
2506 i, srfmp->mappings_count,
2507 srfmp->mappings[i].sms_address,
2508 srfmp->mappings[i].sms_size,
2509 srfmp->mappings[i].sms_file_offset,
2510 srfmp->mappings[i].sms_max_prot,
2511 srfmp->mappings[i].sms_init_prot));
2512 error = EINVAL;
2513 goto done;
2514 }
2515 }
2516 }
2517 done:
2518 if (error != 0) {
2519 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region, *scdir_vp);
2520 *sr_file_mappings = NULL;
2521 *shared_region_ptr = NULL;
2522 *scdir_vp = NULL;
2523 }
2524 return error;
2525 }
2526
2527 /*
2528 * shared_region_map_np()
2529 *
2530 * This system call is intended for dyld.
2531 *
2532 * dyld uses this to map a shared cache file into a shared region.
2533 * This is usually done only the first time a shared cache is needed.
2534 * Subsequent processes will just use the populated shared region without
2535 * requiring any further setup.
2536 */
2537 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2538 _shared_region_map_and_slide(
2539 struct proc *p,
2540 uint32_t files_count,
2541 struct shared_file_np *files,
2542 uint32_t mappings_count,
2543 struct shared_file_mapping_slide_np *mappings)
2544 {
2545 int error = 0;
2546 kern_return_t kr = KERN_SUCCESS;
2547 struct _sr_file_mappings *sr_file_mappings = NULL;
2548 struct vnode *scdir_vp = NULL;
2549 struct vnode *rdir_vp = NULL;
2550 struct vm_shared_region *shared_region = NULL;
2551
2552 /*
2553 * Get a reference to the current proc's root dir.
2554 * Need this to prevent racing with chroot.
2555 */
2556 proc_fdlock(p);
2557 rdir_vp = p->p_fd.fd_rdir;
2558 if (rdir_vp == NULL) {
2559 rdir_vp = rootvnode;
2560 }
2561 assert(rdir_vp != NULL);
2562 vnode_get(rdir_vp);
2563 proc_fdunlock(p);
2564
2565 /*
2566 * Turn files, mappings into sr_file_mappings and other setup.
2567 */
2568 error = shared_region_map_and_slide_setup(p, files_count,
2569 files, mappings_count, mappings,
2570 &sr_file_mappings, &shared_region, &scdir_vp, rdir_vp);
2571 if (error != 0) {
2572 vnode_put(rdir_vp);
2573 return error;
2574 }
2575
2576 /* map the file(s) into that shared region's submap */
2577 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2578 if (kr != KERN_SUCCESS) {
2579 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2580 "vm_shared_region_map_file() failed kr=0x%x\n",
2581 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2582 proc_getpid(p), p->p_comm, kr));
2583 }
2584
2585 /* convert kern_return_t to errno */
2586 switch (kr) {
2587 case KERN_SUCCESS:
2588 error = 0;
2589 break;
2590 case KERN_INVALID_ADDRESS:
2591 error = EFAULT;
2592 break;
2593 case KERN_PROTECTION_FAILURE:
2594 error = EPERM;
2595 break;
2596 case KERN_NO_SPACE:
2597 error = ENOMEM;
2598 break;
2599 case KERN_FAILURE:
2600 case KERN_INVALID_ARGUMENT:
2601 default:
2602 error = EINVAL;
2603 break;
2604 }
2605
2606 /*
2607 * Mark that this process is now using split libraries.
2608 */
2609 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2610 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2611 }
2612
2613 vnode_put(rdir_vp);
2614 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region, scdir_vp);
2615
2616 SHARED_REGION_TRACE_DEBUG(
2617 ("shared_region: %p [%d(%s)] <- map\n",
2618 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2619 proc_getpid(p), p->p_comm));
2620
2621 return error;
2622 }
2623
2624 /*
2625 * Clean up part of _shared_region_map_and_slide()
2626 * It had to be broken out of _shared_region_map_and_slide() to
2627 * prevent compiler inlining from blowing out the stack.
2628 */
2629 __attribute__((noinline))
2630 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region,struct vnode * scdir_vp)2631 shared_region_map_and_slide_cleanup(
2632 struct proc *p,
2633 uint32_t files_count,
2634 struct _sr_file_mappings *sr_file_mappings,
2635 struct vm_shared_region *shared_region,
2636 struct vnode *scdir_vp)
2637 {
2638 struct _sr_file_mappings *srfmp;
2639 struct vnode_attr va;
2640
2641 if (sr_file_mappings != NULL) {
2642 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2643 if (srfmp->vp != NULL) {
2644 vnode_lock_spin(srfmp->vp);
2645 srfmp->vp->v_flag |= VSHARED_DYLD;
2646 vnode_unlock(srfmp->vp);
2647
2648 /* update the vnode's access time */
2649 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2650 VATTR_INIT(&va);
2651 nanotime(&va.va_access_time);
2652 VATTR_SET_ACTIVE(&va, va_access_time);
2653 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2654 }
2655
2656 #if NAMEDSTREAMS
2657 /*
2658 * If the shared cache is compressed, it may
2659 * have a namedstream vnode instantiated for
2660 * for it. That namedstream vnode will also
2661 * have to be marked with VSHARED_DYLD.
2662 */
2663 if (vnode_hasnamedstreams(srfmp->vp)) {
2664 vnode_t svp;
2665 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2666 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2667 vnode_lock_spin(svp);
2668 svp->v_flag |= VSHARED_DYLD;
2669 vnode_unlock(svp);
2670 vnode_put(svp);
2671 }
2672 }
2673 #endif /* NAMEDSTREAMS */
2674 /*
2675 * release the vnode...
2676 * ubc_map() still holds it for us in the non-error case
2677 */
2678 (void) vnode_put(srfmp->vp);
2679 srfmp->vp = NULL;
2680 }
2681 if (srfmp->fp != NULL) {
2682 /* release the file descriptor */
2683 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2684 srfmp->fp = NULL;
2685 }
2686 }
2687 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2688 }
2689
2690 if (scdir_vp != NULL) {
2691 (void)vnode_put(scdir_vp);
2692 scdir_vp = NULL;
2693 }
2694
2695 if (shared_region != NULL) {
2696 vm_shared_region_deallocate(shared_region);
2697 }
2698 }
2699
2700
2701 /*
2702 * For each file mapped, we may have mappings for:
2703 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2704 * so let's round up to 8 mappings per file.
2705 */
2706 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2707
2708 /*
2709 * This is the older interface that dyld uses to map in the shared
2710 * library. dyld is slowly moving to the new shared_region_map_and_slide_2_np()
2711 * call as needed.
2712 */
2713 int
shared_region_map_and_slide_np(struct proc * p,struct shared_region_map_and_slide_np_args * uap,__unused int * retvalp)2714 shared_region_map_and_slide_np(
2715 struct proc *p,
2716 struct shared_region_map_and_slide_np_args *uap,
2717 __unused int *retvalp)
2718 {
2719 unsigned int mappings_count = uap->count;
2720 unsigned int m;
2721 uint32_t slide = uap->slide;
2722 struct shared_file_np shared_files[1];
2723 struct shared_file_mapping_np legacy_mapping;
2724 struct shared_file_mapping_slide_np *mappings = NULL;
2725 kern_return_t kr = KERN_SUCCESS;
2726
2727 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
2728 if (kr == KERN_INVALID_ARGUMENT) {
2729 /*
2730 * This will happen if we request sliding again
2731 * with the same slide value that was used earlier
2732 * for the very first sliding.
2733 */
2734 kr = KERN_SUCCESS;
2735 }
2736 goto done;
2737 }
2738
2739 if (mappings_count == 0) {
2740 SHARED_REGION_TRACE_INFO(
2741 ("shared_region: %p [%d(%s)] map(): "
2742 "no mappings\n",
2743 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2744 proc_getpid(p), p->p_comm));
2745 kr = 0; /* no mappings: we're done ! */
2746 goto done;
2747 } else if (mappings_count <= SFM_MAX) {
2748 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2749 if (mappings == NULL) {
2750 kr = KERN_RESOURCE_SHORTAGE;
2751 goto done;
2752 }
2753 } else {
2754 SHARED_REGION_TRACE_ERROR(
2755 ("shared_region: %p [%d(%s)] map(): "
2756 "too many mappings (%d) max %d\n",
2757 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2758 proc_getpid(p), p->p_comm,
2759 mappings_count, SFM_MAX));
2760 kr = KERN_FAILURE;
2761 goto done;
2762 }
2763
2764 /*
2765 * Read in the mappings and translate to new format.
2766 */
2767 for (m = 0; m < mappings_count; ++m) {
2768 user_addr_t from_uaddr = uap->mappings + (m * sizeof(struct shared_file_mapping_np));
2769 kr = shared_region_copyin(p, from_uaddr, 1, sizeof(legacy_mapping), &legacy_mapping);
2770 if (kr != 0) {
2771 goto done;
2772 }
2773 mappings[m].sms_address = legacy_mapping.sfm_address;
2774 mappings[m].sms_size = legacy_mapping.sfm_size;
2775 mappings[m].sms_file_offset = legacy_mapping.sfm_file_offset;
2776 mappings[m].sms_max_prot = legacy_mapping.sfm_max_prot;
2777 mappings[m].sms_init_prot = legacy_mapping.sfm_init_prot;
2778 mappings[m].sms_slide_size = uap->slide_size;
2779 mappings[m].sms_slide_start = uap->slide_start;
2780 }
2781
2782 bzero(shared_files, sizeof(shared_files));
2783 shared_files[0].sf_fd = uap->fd;
2784 shared_files[0].sf_mappings_count = mappings_count;
2785 shared_files[0].sf_slide = slide;
2786
2787 kr = _shared_region_map_and_slide(p,
2788 1, /* # of files to map */
2789 &shared_files[0], /* files to map */
2790 mappings_count,
2791 mappings);
2792
2793 done:
2794 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2795 return kr;
2796 }
2797
2798 /*
2799 * This is the new interface for setting up shared region mappings.
2800 *
2801 * The slide used for shared regions setup using this interface is done differently
2802 * from the old interface. The slide value passed in the shared_files_np represents
2803 * a max value. The kernel will choose a random value based on that, then use it
2804 * for all shared regions.
2805 */
2806 #if defined (__x86_64__)
2807 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2808 #else
2809 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2810 #endif
2811
2812 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2813 shared_region_map_and_slide_2_np(
2814 struct proc *p,
2815 struct shared_region_map_and_slide_2_np_args *uap,
2816 __unused int *retvalp)
2817 {
2818 unsigned int files_count;
2819 struct shared_file_np *shared_files = NULL;
2820 unsigned int mappings_count;
2821 struct shared_file_mapping_slide_np *mappings = NULL;
2822 kern_return_t kr = KERN_SUCCESS;
2823
2824 files_count = uap->files_count;
2825 mappings_count = uap->mappings_count;
2826
2827 if (files_count == 0) {
2828 SHARED_REGION_TRACE_INFO(
2829 ("shared_region: %p [%d(%s)] map(): "
2830 "no files\n",
2831 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2832 proc_getpid(p), p->p_comm));
2833 kr = 0; /* no files to map: we're done ! */
2834 goto done;
2835 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2836 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2837 if (shared_files == NULL) {
2838 kr = KERN_RESOURCE_SHORTAGE;
2839 goto done;
2840 }
2841 } else {
2842 SHARED_REGION_TRACE_ERROR(
2843 ("shared_region: %p [%d(%s)] map(): "
2844 "too many files (%d) max %d\n",
2845 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2846 proc_getpid(p), p->p_comm,
2847 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2848 kr = KERN_FAILURE;
2849 goto done;
2850 }
2851
2852 if (mappings_count == 0) {
2853 SHARED_REGION_TRACE_INFO(
2854 ("shared_region: %p [%d(%s)] map(): "
2855 "no mappings\n",
2856 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2857 proc_getpid(p), p->p_comm));
2858 kr = 0; /* no mappings: we're done ! */
2859 goto done;
2860 } else if (mappings_count <= SFM_MAX) {
2861 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2862 if (mappings == NULL) {
2863 kr = KERN_RESOURCE_SHORTAGE;
2864 goto done;
2865 }
2866 } else {
2867 SHARED_REGION_TRACE_ERROR(
2868 ("shared_region: %p [%d(%s)] map(): "
2869 "too many mappings (%d) max %d\n",
2870 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2871 proc_getpid(p), p->p_comm,
2872 mappings_count, SFM_MAX));
2873 kr = KERN_FAILURE;
2874 goto done;
2875 }
2876
2877 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2878 if (kr != KERN_SUCCESS) {
2879 goto done;
2880 }
2881
2882 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2883 if (kr != KERN_SUCCESS) {
2884 goto done;
2885 }
2886
2887 uint32_t max_slide = shared_files[0].sf_slide;
2888 uint32_t random_val;
2889 uint32_t slide_amount;
2890
2891 if (max_slide != 0) {
2892 read_random(&random_val, sizeof random_val);
2893 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2894 } else {
2895 slide_amount = 0;
2896 }
2897 #if DEVELOPMENT || DEBUG
2898 extern bool bootarg_disable_aslr;
2899 if (bootarg_disable_aslr) {
2900 slide_amount = 0;
2901 }
2902 #endif /* DEVELOPMENT || DEBUG */
2903
2904 /*
2905 * Fix up the mappings to reflect the desired slide.
2906 */
2907 unsigned int f;
2908 unsigned int m = 0;
2909 unsigned int i;
2910 for (f = 0; f < files_count; ++f) {
2911 shared_files[f].sf_slide = slide_amount;
2912 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2913 if (m >= mappings_count) {
2914 SHARED_REGION_TRACE_ERROR(
2915 ("shared_region: %p [%d(%s)] map(): "
2916 "mapping count argument was too small\n",
2917 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2918 proc_getpid(p), p->p_comm));
2919 kr = KERN_FAILURE;
2920 goto done;
2921 }
2922 mappings[m].sms_address += slide_amount;
2923 if (mappings[m].sms_slide_size != 0) {
2924 mappings[m].sms_slide_start += slide_amount;
2925 }
2926 }
2927 }
2928
2929 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2930 done:
2931 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2932 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2933 return kr;
2934 }
2935
2936 /* sysctl overflow room */
2937
2938 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2939 (int *) &page_size, 0, "vm page size");
2940
2941 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2942 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2943 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2944 extern unsigned int vm_page_free_target;
2945 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2946 &vm_page_free_target, 0, "Pageout daemon free target");
2947
2948 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2949 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2950
2951 static int
2952 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2953 {
2954 #pragma unused(oidp, arg1, arg2)
2955 unsigned int page_free_wanted;
2956
2957 page_free_wanted = mach_vm_ctl_page_free_wanted();
2958 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2959 }
2960 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2961 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2962 0, 0, vm_ctl_page_free_wanted, "I", "");
2963
2964 extern unsigned int vm_page_purgeable_count;
2965 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2966 &vm_page_purgeable_count, 0, "Purgeable page count");
2967
2968 extern unsigned int vm_page_purgeable_wired_count;
2969 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2970 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2971
2972 extern unsigned int vm_page_kern_lpage_count;
2973 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2974 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2975
2976 #if DEVELOPMENT || DEBUG
2977 #if __ARM_MIXED_PAGE_SIZE__
2978 static int vm_mixed_pagesize_supported = 1;
2979 #else
2980 static int vm_mixed_pagesize_supported = 0;
2981 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2982 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2983 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
2984
2985 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
2986 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
2987 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
2988 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
2989
2990 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
2991 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
2992 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
2993 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
2994 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
2995 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
2996
2997 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2998 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
2999 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3000 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3001 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3002 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3003 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3004 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3005 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3006 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3007 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3008 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3009 #endif /* DEVELOPMENT || DEBUG */
3010
3011 extern int madvise_free_debug;
3012 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3013 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3014
3015 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3016 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3017 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3018 &vm_page_stats_reusable.reusable_pages_success, "");
3019 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3020 &vm_page_stats_reusable.reusable_pages_failure, "");
3021 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3022 &vm_page_stats_reusable.reusable_pages_shared, "");
3023 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3024 &vm_page_stats_reusable.all_reusable_calls, "");
3025 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3026 &vm_page_stats_reusable.partial_reusable_calls, "");
3027 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3028 &vm_page_stats_reusable.reuse_pages_success, "");
3029 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3030 &vm_page_stats_reusable.reuse_pages_failure, "");
3031 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3032 &vm_page_stats_reusable.all_reuse_calls, "");
3033 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3034 &vm_page_stats_reusable.partial_reuse_calls, "");
3035 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3036 &vm_page_stats_reusable.can_reuse_success, "");
3037 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3038 &vm_page_stats_reusable.can_reuse_failure, "");
3039 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3040 &vm_page_stats_reusable.reusable_reclaimed, "");
3041 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3042 &vm_page_stats_reusable.reusable_nonwritable, "");
3043 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3044 &vm_page_stats_reusable.reusable_shared, "");
3045 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3046 &vm_page_stats_reusable.free_shared, "");
3047
3048
3049 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3050 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3051 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3052
3053 extern unsigned int vm_page_cleaned_count;
3054 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3055
3056 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3057 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3058 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3059
3060 /* pageout counts */
3061 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3062 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3063
3064 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3065 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3066 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3067 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3068 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3069 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3070
3071
3072 /* counts of pages prefaulted when entering a memory object */
3073 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3074 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3075 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3076
3077 #if defined (__x86_64__)
3078 extern unsigned int vm_clump_promote_threshold;
3079 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3080 #if DEVELOPMENT || DEBUG
3081 extern unsigned long vm_clump_stats[];
3082 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3083 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3084 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3085 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3086 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3087 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3088 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3089 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3090 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3091 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3092 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3093 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3094 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3095 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3096 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3097 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3098 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3099 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3100 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3101 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3102 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3103 #endif /* if DEVELOPMENT || DEBUG */
3104 #endif /* #if defined (__x86_64__) */
3105
3106 #if CONFIG_SECLUDED_MEMORY
3107
3108 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3109 extern unsigned int vm_page_secluded_target;
3110 extern unsigned int vm_page_secluded_count;
3111 extern unsigned int vm_page_secluded_count_free;
3112 extern unsigned int vm_page_secluded_count_inuse;
3113 extern unsigned int vm_page_secluded_count_over_target;
3114 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3115 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3116 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3117 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3118 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3119
3120 extern struct vm_page_secluded_data vm_page_secluded;
3121 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3122 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3123 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3124 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3125 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3126 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3127 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3128 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3129
3130 #endif /* CONFIG_SECLUDED_MEMORY */
3131
3132 #include <kern/thread.h>
3133 #include <sys/user.h>
3134
3135 void vm_pageout_io_throttle(void);
3136
3137 void
vm_pageout_io_throttle(void)3138 vm_pageout_io_throttle(void)
3139 {
3140 struct uthread *uthread = current_uthread();
3141
3142 /*
3143 * thread is marked as a low priority I/O type
3144 * and the I/O we issued while in this cleaning operation
3145 * collided with normal I/O operations... we'll
3146 * delay in order to mitigate the impact of this
3147 * task on the normal operation of the system
3148 */
3149
3150 if (uthread->uu_lowpri_window) {
3151 throttle_lowpri_io(1);
3152 }
3153 }
3154
3155 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3156 vm_pressure_monitor(
3157 __unused struct proc *p,
3158 struct vm_pressure_monitor_args *uap,
3159 int *retval)
3160 {
3161 kern_return_t kr;
3162 uint32_t pages_reclaimed;
3163 uint32_t pages_wanted;
3164
3165 kr = mach_vm_pressure_monitor(
3166 (boolean_t) uap->wait_for_pressure,
3167 uap->nsecs_monitored,
3168 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3169 &pages_wanted);
3170
3171 switch (kr) {
3172 case KERN_SUCCESS:
3173 break;
3174 case KERN_ABORTED:
3175 return EINTR;
3176 default:
3177 return EINVAL;
3178 }
3179
3180 if (uap->pages_reclaimed) {
3181 if (copyout((void *)&pages_reclaimed,
3182 uap->pages_reclaimed,
3183 sizeof(pages_reclaimed)) != 0) {
3184 return EFAULT;
3185 }
3186 }
3187
3188 *retval = (int) pages_wanted;
3189 return 0;
3190 }
3191
3192 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3193 kas_info(struct proc *p,
3194 struct kas_info_args *uap,
3195 int *retval __unused)
3196 {
3197 #ifndef CONFIG_KAS_INFO
3198 (void)p;
3199 (void)uap;
3200 return ENOTSUP;
3201 #else /* CONFIG_KAS_INFO */
3202 int selector = uap->selector;
3203 user_addr_t valuep = uap->value;
3204 user_addr_t sizep = uap->size;
3205 user_size_t size, rsize;
3206 int error;
3207
3208 if (!kauth_cred_issuser(kauth_cred_get())) {
3209 return EPERM;
3210 }
3211
3212 #if CONFIG_MACF
3213 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3214 if (error) {
3215 return error;
3216 }
3217 #endif
3218
3219 if (IS_64BIT_PROCESS(p)) {
3220 user64_size_t size64;
3221 error = copyin(sizep, &size64, sizeof(size64));
3222 size = (user_size_t)size64;
3223 } else {
3224 user32_size_t size32;
3225 error = copyin(sizep, &size32, sizeof(size32));
3226 size = (user_size_t)size32;
3227 }
3228 if (error) {
3229 return error;
3230 }
3231
3232 switch (selector) {
3233 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3234 {
3235 uint64_t slide = vm_kernel_slide;
3236
3237 if (sizeof(slide) != size) {
3238 return EINVAL;
3239 }
3240
3241 error = copyout(&slide, valuep, sizeof(slide));
3242 if (error) {
3243 return error;
3244 }
3245 rsize = size;
3246 }
3247 break;
3248 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3249 {
3250 uint32_t i;
3251 kernel_mach_header_t *mh = &_mh_execute_header;
3252 struct load_command *cmd;
3253 cmd = (struct load_command*) &mh[1];
3254 uint64_t *bases;
3255 rsize = mh->ncmds * sizeof(uint64_t);
3256
3257 /*
3258 * Return the size if no data was passed
3259 */
3260 if (valuep == 0) {
3261 break;
3262 }
3263
3264 if (rsize > size) {
3265 return EINVAL;
3266 }
3267
3268 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3269
3270 for (i = 0; i < mh->ncmds; i++) {
3271 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3272 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3273 bases[i] = (uint64_t)sg->vmaddr;
3274 }
3275 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3276 }
3277
3278 error = copyout(bases, valuep, rsize);
3279
3280 kfree_data(bases, rsize);
3281
3282 if (error) {
3283 return error;
3284 }
3285 }
3286 break;
3287 default:
3288 return EINVAL;
3289 }
3290
3291 if (IS_64BIT_PROCESS(p)) {
3292 user64_size_t size64 = (user64_size_t)rsize;
3293 error = copyout(&size64, sizep, sizeof(size64));
3294 } else {
3295 user32_size_t size32 = (user32_size_t)rsize;
3296 error = copyout(&size32, sizep, sizeof(size32));
3297 }
3298
3299 return error;
3300 #endif /* CONFIG_KAS_INFO */
3301 }
3302
3303 #if __has_feature(ptrauth_calls)
3304 /*
3305 * Generate a random pointer signing key that isn't 0.
3306 */
3307 uint64_t
generate_jop_key(void)3308 generate_jop_key(void)
3309 {
3310 uint64_t key;
3311
3312 do {
3313 read_random(&key, sizeof key);
3314 } while (key == 0);
3315 return key;
3316 }
3317 #endif /* __has_feature(ptrauth_calls) */
3318
3319
3320 #pragma clang diagnostic push
3321 #pragma clang diagnostic ignored "-Wcast-qual"
3322 #pragma clang diagnostic ignored "-Wunused-function"
3323
3324 static void
asserts()3325 asserts()
3326 {
3327 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3328 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3329 }
3330
3331 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3332 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3333 #pragma clang diagnostic pop
3334
3335 extern uint32_t vm_page_pages;
3336 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3337
3338 extern uint32_t vm_page_busy_absent_skipped;
3339 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3340
3341 extern uint32_t vm_page_upl_tainted;
3342 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3343
3344 extern uint32_t vm_page_iopl_tainted;
3345 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3346
3347 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
3348 extern int vm_footprint_suspend_allowed;
3349 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3350
3351 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3352 static int
3353 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3354 {
3355 #pragma unused(oidp, arg1, arg2)
3356 int error = 0;
3357 int new_value;
3358
3359 if (req->newptr == USER_ADDR_NULL) {
3360 return 0;
3361 }
3362 error = SYSCTL_IN(req, &new_value, sizeof(int));
3363 if (error) {
3364 return error;
3365 }
3366 if (!vm_footprint_suspend_allowed) {
3367 if (new_value != 0) {
3368 /* suspends are not allowed... */
3369 return 0;
3370 }
3371 /* ... but let resumes proceed */
3372 }
3373 DTRACE_VM2(footprint_suspend,
3374 vm_map_t, current_map(),
3375 int, new_value);
3376
3377 pmap_footprint_suspend(current_map(), new_value);
3378
3379 return 0;
3380 }
3381 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3382 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3383 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3384 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
3385
3386 extern uint64_t vm_map_corpse_footprint_count;
3387 extern uint64_t vm_map_corpse_footprint_size_avg;
3388 extern uint64_t vm_map_corpse_footprint_size_max;
3389 extern uint64_t vm_map_corpse_footprint_full;
3390 extern uint64_t vm_map_corpse_footprint_no_buf;
3391 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3392 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3393 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3394 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3395 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3396 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3397 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3398 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3399 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3400 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3401
3402
3403 extern uint64_t shared_region_pager_copied;
3404 extern uint64_t shared_region_pager_slid;
3405 extern uint64_t shared_region_pager_slid_error;
3406 extern uint64_t shared_region_pager_reclaimed;
3407 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3408 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3409 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3410 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3411 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3412 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3413 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3414 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3415 extern int shared_region_destroy_delay;
3416 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3417 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3418
3419 #if MACH_ASSERT
3420 extern int pmap_ledgers_panic_leeway;
3421 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3422 #endif /* MACH_ASSERT */
3423
3424
3425 extern uint64_t vm_map_lookup_locked_copy_slowly_count;
3426 extern uint64_t vm_map_lookup_locked_copy_slowly_size;
3427 extern uint64_t vm_map_lookup_locked_copy_slowly_max;
3428 extern uint64_t vm_map_lookup_locked_copy_slowly_restart;
3429 extern uint64_t vm_map_lookup_locked_copy_slowly_error;
3430 extern uint64_t vm_map_lookup_locked_copy_strategically_count;
3431 extern uint64_t vm_map_lookup_locked_copy_strategically_size;
3432 extern uint64_t vm_map_lookup_locked_copy_strategically_max;
3433 extern uint64_t vm_map_lookup_locked_copy_strategically_restart;
3434 extern uint64_t vm_map_lookup_locked_copy_strategically_error;
3435 extern uint64_t vm_map_lookup_locked_copy_shadow_count;
3436 extern uint64_t vm_map_lookup_locked_copy_shadow_size;
3437 extern uint64_t vm_map_lookup_locked_copy_shadow_max;
3438 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3439 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_count, "");
3440 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3441 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_size, "");
3442 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3443 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_max, "");
3444 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3445 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_restart, "");
3446 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3447 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_error, "");
3448 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3449 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_count, "");
3450 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3451 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_size, "");
3452 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3453 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_max, "");
3454 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3455 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_restart, "");
3456 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3457 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_error, "");
3458 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3459 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_count, "");
3460 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3461 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_size, "");
3462 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3463 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_max, "");
3464
3465 extern int vm_protect_privileged_from_untrusted;
3466 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3467 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3468 extern uint64_t vm_copied_on_read;
3469 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3470 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3471
3472 extern int vm_shared_region_count;
3473 extern int vm_shared_region_peak;
3474 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3475 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3476 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3477 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3478 #if DEVELOPMENT || DEBUG
3479 extern unsigned int shared_region_pagers_resident_count;
3480 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3481 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3482 extern unsigned int shared_region_pagers_resident_peak;
3483 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3484 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3485 extern int shared_region_pager_count;
3486 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3487 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3488 #if __has_feature(ptrauth_calls)
3489 extern int shared_region_key_count;
3490 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3491 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3492 extern int vm_shared_region_reslide_count;
3493 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3494 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3495 #endif /* __has_feature(ptrauth_calls) */
3496 #endif /* DEVELOPMENT || DEBUG */
3497
3498 #if MACH_ASSERT
3499 extern int debug4k_filter;
3500 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3501 extern int debug4k_panic_on_terminate;
3502 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3503 extern int debug4k_panic_on_exception;
3504 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3505 extern int debug4k_panic_on_misaligned_sharing;
3506 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3507 #endif /* MACH_ASSERT */
3508
3509 extern uint64_t vm_map_set_size_limit_count;
3510 extern uint64_t vm_map_set_data_limit_count;
3511 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3512 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3513 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3514 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3515 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3516 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3517
3518 extern uint64_t vm_fault_resilient_media_initiate;
3519 extern uint64_t vm_fault_resilient_media_retry;
3520 extern uint64_t vm_fault_resilient_media_proceed;
3521 extern uint64_t vm_fault_resilient_media_release;
3522 extern uint64_t vm_fault_resilient_media_abort1;
3523 extern uint64_t vm_fault_resilient_media_abort2;
3524 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3525 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3526 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3527 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3528 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3529 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3530 #if MACH_ASSERT
3531 extern int vm_fault_resilient_media_inject_error1_rate;
3532 extern int vm_fault_resilient_media_inject_error1;
3533 extern int vm_fault_resilient_media_inject_error2_rate;
3534 extern int vm_fault_resilient_media_inject_error2;
3535 extern int vm_fault_resilient_media_inject_error3_rate;
3536 extern int vm_fault_resilient_media_inject_error3;
3537 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3538 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3539 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3540 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3541 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3542 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3543 #endif /* MACH_ASSERT */
3544
3545 /*
3546 * A sysctl which causes all existing shared regions to become stale. They
3547 * will no longer be used by anything new and will be torn down as soon as
3548 * the last existing user exits. A write of non-zero value causes that to happen.
3549 * This should only be used by launchd, so we check that this is initproc.
3550 */
3551 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3552 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3553 {
3554 unsigned int value = 0;
3555 int changed = 0;
3556 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3557 if (error || !changed) {
3558 return error;
3559 }
3560 if (current_proc() != initproc) {
3561 return EPERM;
3562 }
3563
3564 vm_shared_region_pivot();
3565
3566 return 0;
3567 }
3568
3569 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3570 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3571 0, 0, shared_region_pivot, "I", "");
3572
3573 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3574 &vmtc_total, 0, "total text page corruptions detected");
3575
3576 /*
3577 * sysctl to return the number of pages on retired_pages_object
3578 */
3579 static int
3580 retired_pages_count SYSCTL_HANDLER_ARGS
3581 {
3582 #pragma unused(arg1, arg2, oidp)
3583 extern uint32_t vm_retired_pages_count(void);
3584 uint32_t value = vm_retired_pages_count();
3585
3586 return SYSCTL_OUT(req, &value, sizeof(value));
3587 }
3588 SYSCTL_PROC(_vm, OID_AUTO, retired_pages_count, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3589 0, 0, &retired_pages_count, "I", "");
3590
3591 #if DEBUG || DEVELOPMENT
3592 /*
3593 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3594 * Used for testing text page self healing.
3595 */
3596 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3597 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3598 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3599 {
3600 uint64_t value = 0;
3601 int error = sysctl_handle_quad(oidp, &value, 0, req);
3602 if (error || !req->newptr) {
3603 return error;
3604 }
3605
3606 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3607 return 0;
3608 } else {
3609 return EINVAL;
3610 }
3611 }
3612
3613 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3614 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3615 0, 0, corrupt_text_addr, "-", "");
3616 #endif /* DEBUG || DEVELOPMENT */
3617