1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/task.h>
43 #include <kern/thread.h>
44 #include <kern/debug.h>
45 #include <kern/extmod_statistics.h>
46 #include <mach/mach_traps.h>
47 #include <mach/port.h>
48 #include <mach/sdt.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56 #include <machine/machine_routines.h>
57
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/dir.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/vm.h>
66 #include <sys/file.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/xattr.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/random.h>
85 #if NECP
86 #include <net/necp.h>
87 #endif /* NECP */
88 #if SKYWALK
89 #include <skywalk/os_channel.h>
90 #endif /* SKYWALK */
91
92 #include <security/audit/audit.h>
93 #include <security/mac.h>
94 #include <bsm/audit_kevents.h>
95
96 #include <kern/kalloc.h>
97 #include <vm/vm_map.h>
98 #include <vm/vm_kern.h>
99 #include <vm/vm_pageout.h>
100
101 #include <mach/shared_region.h>
102 #include <vm/vm_shared_region.h>
103
104 #include <vm/vm_protos.h>
105
106 #include <sys/kern_memorystatus.h>
107 #include <sys/kern_memorystatus_freeze.h>
108 #include <sys/proc_internal.h>
109
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113
114 #include <kern/bits.h>
115
116 #if CONFIG_CSR
117 #include <sys/csr.h>
118 #endif /* CONFIG_CSR */
119 #include <IOKit/IOBSD.h>
120
121 #if VM_MAP_DEBUG_APPLE_PROTECT
122 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
123 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
124
125 #if VM_MAP_DEBUG_FOURK
126 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
127 #endif /* VM_MAP_DEBUG_FOURK */
128
129 #if DEVELOPMENT || DEBUG
130
131 static int
132 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
133 {
134 #pragma unused(arg1, arg2)
135 vm_offset_t kaddr;
136 kern_return_t kr;
137 int error = 0;
138 int size = 0;
139
140 error = sysctl_handle_int(oidp, &size, 0, req);
141 if (error || !req->newptr) {
142 return error;
143 }
144
145 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
146 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
147
148 if (kr == KERN_SUCCESS) {
149 kmem_free(kernel_map, kaddr, size);
150 }
151
152 return error;
153 }
154
155 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
156 0, 0, &sysctl_kmem_alloc_contig, "I", "");
157
158 extern int vm_region_footprint;
159 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
160
161 #endif /* DEVELOPMENT || DEBUG */
162
163 static int
164 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
165 {
166 #pragma unused(arg1, arg2, oidp)
167 int error = 0;
168 int value;
169
170 value = task_self_region_footprint();
171 error = SYSCTL_OUT(req, &value, sizeof(int));
172 if (error) {
173 return error;
174 }
175
176 if (!req->newptr) {
177 return 0;
178 }
179
180 error = SYSCTL_IN(req, &value, sizeof(int));
181 if (error) {
182 return error;
183 }
184 task_self_region_footprint_set(value);
185 return 0;
186 }
187 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
188
189 static int
190 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
191 {
192 #pragma unused(arg1, arg2, oidp)
193 int error = 0;
194 int value;
195
196 value = (1 << thread_self_region_page_shift());
197 error = SYSCTL_OUT(req, &value, sizeof(int));
198 if (error) {
199 return error;
200 }
201
202 if (!req->newptr) {
203 return 0;
204 }
205
206 error = SYSCTL_IN(req, &value, sizeof(int));
207 if (error) {
208 return error;
209 }
210
211 if (value != 0 && value != 4096 && value != 16384) {
212 return EINVAL;
213 }
214
215 #if !__ARM_MIXED_PAGE_SIZE__
216 if (value != vm_map_page_size(current_map())) {
217 return EINVAL;
218 }
219 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
220
221 thread_self_region_page_shift_set(bit_first(value));
222 return 0;
223 }
224 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
225
226
227 #if DEVELOPMENT || DEBUG
228 extern int panic_on_unsigned_execute;
229 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
230 #endif /* DEVELOPMENT || DEBUG */
231
232 extern int cs_executable_create_upl;
233 extern int cs_executable_wire;
234 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
235 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
236
237 extern int apple_protect_pager_count;
238 extern int apple_protect_pager_count_mapped;
239 extern unsigned int apple_protect_pager_cache_limit;
240 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
241 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
242 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
243
244 #if DEVELOPMENT || DEBUG
245 extern int radar_20146450;
246 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
247
248 extern int macho_printf;
249 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
250
251 extern int apple_protect_pager_data_request_debug;
252 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
253
254 #if __arm__ || __arm64__
255 /* These are meant to support the page table accounting unit test. */
256 extern unsigned int arm_hardware_page_size;
257 extern unsigned int arm_pt_desc_size;
258 extern unsigned int arm_pt_root_size;
259 extern unsigned int free_page_size_tt_count;
260 extern unsigned int free_two_page_size_tt_count;
261 extern unsigned int free_tt_count;
262 extern unsigned int inuse_user_tteroot_count;
263 extern unsigned int inuse_kernel_tteroot_count;
264 extern unsigned int inuse_user_ttepages_count;
265 extern unsigned int inuse_kernel_ttepages_count;
266 extern unsigned int inuse_user_ptepages_count;
267 extern unsigned int inuse_kernel_ptepages_count;
268 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
269 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
270 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
271 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
272 SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
273 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
274 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
275 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
276 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
277 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
278 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
279 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
280 #if DEVELOPMENT || DEBUG
281 extern unsigned long pmap_asid_flushes;
282 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
283 extern unsigned long pmap_asid_hits;
284 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
285 extern unsigned long pmap_asid_misses;
286 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
287 #endif
288 #endif /* __arm__ || __arm64__ */
289
290 #if __arm64__
291 extern int fourk_pager_data_request_debug;
292 SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
293 #endif /* __arm64__ */
294 #endif /* DEVELOPMENT || DEBUG */
295
296 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
297 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
298 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
299 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
300 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
301 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
302 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
303 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
304 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
305 #if VM_SCAN_FOR_SHADOW_CHAIN
306 static int vm_shadow_max_enabled = 0; /* Disabled by default */
307 extern int proc_shadow_max(void);
308 static int
309 vm_shadow_max SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(arg1, arg2, oidp)
312 int value = 0;
313
314 if (vm_shadow_max_enabled) {
315 value = proc_shadow_max();
316 }
317
318 return SYSCTL_OUT(req, &value, sizeof(value));
319 }
320 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
321 0, 0, &vm_shadow_max, "I", "");
322
323 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
324
325 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
326
327 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
328
329 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
330 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
331 /*
332 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
333 */
334
335 #if DEVELOPMENT || DEBUG
336 extern int allow_stack_exec, allow_data_exec;
337
338 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
339 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
340
341 #endif /* DEVELOPMENT || DEBUG */
342
343 static const char *prot_values[] = {
344 "none",
345 "read-only",
346 "write-only",
347 "read-write",
348 "execute-only",
349 "read-execute",
350 "write-execute",
351 "read-write-execute"
352 };
353
354 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)355 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
356 {
357 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
358 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
359 }
360
361 /*
362 * shared_region_unnest_logging: level of logging of unnesting events
363 * 0 - no logging
364 * 1 - throttled logging of unexpected unnesting events (default)
365 * 2 - unthrottled logging of unexpected unnesting events
366 * 3+ - unthrottled logging of all unnesting events
367 */
368 int shared_region_unnest_logging = 1;
369
370 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
371 &shared_region_unnest_logging, 0, "");
372
373 int vm_shared_region_unnest_log_interval = 10;
374 int shared_region_unnest_log_count_threshold = 5;
375
376 /*
377 * Shared cache path enforcement.
378 */
379
380 #if XNU_TARGET_OS_OSX
381
382 #if defined (__x86_64__)
383 static int scdir_enforce = 1;
384 #else /* defined (__x86_64__) */
385 static int scdir_enforce = 0; /* AOT caches live elsewhere */
386 #endif /* defined (__x86_64__) */
387
388 static char scdir_path[] = "/System/Library/dyld/";
389
390 #else /* XNU_TARGET_OS_OSX */
391
392 static int scdir_enforce = 0;
393 static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/";
394
395 #endif /* XNU_TARGET_OS_OSX */
396
397 static char driverkit_scdir_path[] = "/System/DriverKit/System/Library/dyld/";
398
399 #ifndef SECURE_KERNEL
400 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
401 {
402 #if CONFIG_CSR
403 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
404 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
405 return EPERM;
406 }
407 #endif /* CONFIG_CSR */
408 return sysctl_handle_int(oidp, arg1, arg2, req);
409 }
410
411 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
412 #endif
413
414 /* These log rate throttling state variables aren't thread safe, but
415 * are sufficient unto the task.
416 */
417 static int64_t last_unnest_log_time = 0;
418 static int shared_region_unnest_log_count = 0;
419
420 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)421 log_unnest_badness(
422 vm_map_t m,
423 vm_map_offset_t s,
424 vm_map_offset_t e,
425 boolean_t is_nested_map,
426 vm_map_offset_t lowest_unnestable_addr)
427 {
428 struct timeval tv;
429
430 if (shared_region_unnest_logging == 0) {
431 return;
432 }
433
434 if (shared_region_unnest_logging <= 2 &&
435 is_nested_map &&
436 s >= lowest_unnestable_addr) {
437 /*
438 * Unnesting of writable map entries is fine.
439 */
440 return;
441 }
442
443 if (shared_region_unnest_logging <= 1) {
444 microtime(&tv);
445 if ((tv.tv_sec - last_unnest_log_time) <
446 vm_shared_region_unnest_log_interval) {
447 if (shared_region_unnest_log_count++ >
448 shared_region_unnest_log_count_threshold) {
449 return;
450 }
451 } else {
452 last_unnest_log_time = tv.tv_sec;
453 shared_region_unnest_log_count = 0;
454 }
455 }
456
457 DTRACE_VM4(log_unnest_badness,
458 vm_map_t, m,
459 vm_map_offset_t, s,
460 vm_map_offset_t, e,
461 vm_map_offset_t, lowest_unnestable_addr);
462 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
463 }
464
465 int
useracc(user_addr_t addr,user_size_t len,int prot)466 useracc(
467 user_addr_t addr,
468 user_size_t len,
469 int prot)
470 {
471 vm_map_t map;
472
473 map = current_map();
474 return vm_map_check_protection(
475 map,
476 vm_map_trunc_page(addr,
477 vm_map_page_mask(map)),
478 vm_map_round_page(addr + len,
479 vm_map_page_mask(map)),
480 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
481 }
482
483 int
vslock(user_addr_t addr,user_size_t len)484 vslock(
485 user_addr_t addr,
486 user_size_t len)
487 {
488 kern_return_t kret;
489 vm_map_t map;
490
491 map = current_map();
492 kret = vm_map_wire_kernel(map,
493 vm_map_trunc_page(addr,
494 vm_map_page_mask(map)),
495 vm_map_round_page(addr + len,
496 vm_map_page_mask(map)),
497 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
498 FALSE);
499
500 switch (kret) {
501 case KERN_SUCCESS:
502 return 0;
503 case KERN_INVALID_ADDRESS:
504 case KERN_NO_SPACE:
505 return ENOMEM;
506 case KERN_PROTECTION_FAILURE:
507 return EACCES;
508 default:
509 return EINVAL;
510 }
511 }
512
513 int
vsunlock(user_addr_t addr,user_size_t len,__unused int dirtied)514 vsunlock(
515 user_addr_t addr,
516 user_size_t len,
517 __unused int dirtied)
518 {
519 #if FIXME /* [ */
520 pmap_t pmap;
521 vm_page_t pg;
522 vm_map_offset_t vaddr;
523 ppnum_t paddr;
524 #endif /* FIXME ] */
525 kern_return_t kret;
526 vm_map_t map;
527
528 map = current_map();
529
530 #if FIXME /* [ */
531 if (dirtied) {
532 pmap = get_task_pmap(current_task());
533 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
534 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
535 vaddr += PAGE_SIZE) {
536 paddr = pmap_find_phys(pmap, vaddr);
537 pg = PHYS_TO_VM_PAGE(paddr);
538 vm_page_set_modified(pg);
539 }
540 }
541 #endif /* FIXME ] */
542 #ifdef lint
543 dirtied++;
544 #endif /* lint */
545 kret = vm_map_unwire(map,
546 vm_map_trunc_page(addr,
547 vm_map_page_mask(map)),
548 vm_map_round_page(addr + len,
549 vm_map_page_mask(map)),
550 FALSE);
551 switch (kret) {
552 case KERN_SUCCESS:
553 return 0;
554 case KERN_INVALID_ADDRESS:
555 case KERN_NO_SPACE:
556 return ENOMEM;
557 case KERN_PROTECTION_FAILURE:
558 return EACCES;
559 default:
560 return EINVAL;
561 }
562 }
563
564 int
subyte(user_addr_t addr,int byte)565 subyte(
566 user_addr_t addr,
567 int byte)
568 {
569 char character;
570
571 character = (char)byte;
572 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
573 }
574
575 int
suibyte(user_addr_t addr,int byte)576 suibyte(
577 user_addr_t addr,
578 int byte)
579 {
580 char character;
581
582 character = (char)byte;
583 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
584 }
585
586 int
fubyte(user_addr_t addr)587 fubyte(user_addr_t addr)
588 {
589 unsigned char byte;
590
591 if (copyin(addr, (void *) &byte, sizeof(char))) {
592 return -1;
593 }
594 return byte;
595 }
596
597 int
fuibyte(user_addr_t addr)598 fuibyte(user_addr_t addr)
599 {
600 unsigned char byte;
601
602 if (copyin(addr, (void *) &(byte), sizeof(char))) {
603 return -1;
604 }
605 return byte;
606 }
607
608 int
suword(user_addr_t addr,long word)609 suword(
610 user_addr_t addr,
611 long word)
612 {
613 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
614 }
615
616 long
fuword(user_addr_t addr)617 fuword(user_addr_t addr)
618 {
619 long word = 0;
620
621 if (copyin(addr, (void *) &word, sizeof(int))) {
622 return -1;
623 }
624 return word;
625 }
626
627 /* suiword and fuiword are the same as suword and fuword, respectively */
628
629 int
suiword(user_addr_t addr,long word)630 suiword(
631 user_addr_t addr,
632 long word)
633 {
634 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
635 }
636
637 long
fuiword(user_addr_t addr)638 fuiword(user_addr_t addr)
639 {
640 long word = 0;
641
642 if (copyin(addr, (void *) &word, sizeof(int))) {
643 return -1;
644 }
645 return word;
646 }
647
648 /*
649 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
650 * fetching and setting of process-sized size_t and pointer values.
651 */
652 int
sulong(user_addr_t addr,int64_t word)653 sulong(user_addr_t addr, int64_t word)
654 {
655 if (IS_64BIT_PROCESS(current_proc())) {
656 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
657 } else {
658 return suiword(addr, (long)word);
659 }
660 }
661
662 int64_t
fulong(user_addr_t addr)663 fulong(user_addr_t addr)
664 {
665 int64_t longword;
666
667 if (IS_64BIT_PROCESS(current_proc())) {
668 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
669 return -1;
670 }
671 return longword;
672 } else {
673 return (int64_t)fuiword(addr);
674 }
675 }
676
677 int
suulong(user_addr_t addr,uint64_t uword)678 suulong(user_addr_t addr, uint64_t uword)
679 {
680 if (IS_64BIT_PROCESS(current_proc())) {
681 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
682 } else {
683 return suiword(addr, (uint32_t)uword);
684 }
685 }
686
687 uint64_t
fuulong(user_addr_t addr)688 fuulong(user_addr_t addr)
689 {
690 uint64_t ulongword;
691
692 if (IS_64BIT_PROCESS(current_proc())) {
693 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
694 return -1ULL;
695 }
696 return ulongword;
697 } else {
698 return (uint64_t)fuiword(addr);
699 }
700 }
701
702 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)703 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
704 {
705 return ENOTSUP;
706 }
707
708 /*
709 * pid_for_task
710 *
711 * Find the BSD process ID for the Mach task associated with the given Mach port
712 * name
713 *
714 * Parameters: args User argument descriptor (see below)
715 *
716 * Indirect parameters: args->t Mach port name
717 * args->pid Process ID (returned value; see below)
718 *
719 * Returns: KERL_SUCCESS Success
720 * KERN_FAILURE Not success
721 *
722 * Implicit returns: args->pid Process ID
723 *
724 */
725 kern_return_t
pid_for_task(struct pid_for_task_args * args)726 pid_for_task(
727 struct pid_for_task_args *args)
728 {
729 mach_port_name_t t = args->t;
730 user_addr_t pid_addr = args->pid;
731 proc_t p;
732 task_t t1;
733 int pid = -1;
734 kern_return_t err = KERN_SUCCESS;
735
736 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
737 AUDIT_ARG(mach_port1, t);
738
739 t1 = port_name_to_task_name(t);
740
741 if (t1 == TASK_NULL) {
742 err = KERN_FAILURE;
743 goto pftout;
744 } else {
745 p = get_bsdtask_info(t1);
746 if (p) {
747 pid = proc_pid(p);
748 err = KERN_SUCCESS;
749 } else if (is_corpsetask(t1)) {
750 pid = task_pid(t1);
751 err = KERN_SUCCESS;
752 } else {
753 err = KERN_FAILURE;
754 }
755 }
756 task_deallocate(t1);
757 pftout:
758 AUDIT_ARG(pid, pid);
759 (void) copyout((char *) &pid, pid_addr, sizeof(int));
760 AUDIT_MACH_SYSCALL_EXIT(err);
761 return err;
762 }
763
764 /*
765 *
766 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
767 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
768 *
769 */
770 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
771
772 /*
773 * Routine: task_for_pid_posix_check
774 * Purpose:
775 * Verify that the current process should be allowed to
776 * get the target process's task port. This is only
777 * permitted if:
778 * - The current process is root
779 * OR all of the following are true:
780 * - The target process's real, effective, and saved uids
781 * are the same as the current proc's euid,
782 * - The target process's group set is a subset of the
783 * calling process's group set, and
784 * - The target process hasn't switched credentials.
785 *
786 * Returns: TRUE: permitted
787 * FALSE: denied
788 */
789 static int
task_for_pid_posix_check(proc_t target)790 task_for_pid_posix_check(proc_t target)
791 {
792 kauth_cred_t targetcred, mycred;
793 uid_t myuid;
794 int allowed;
795
796 /* No task_for_pid on bad targets */
797 if (target->p_stat == SZOMB) {
798 return FALSE;
799 }
800
801 mycred = kauth_cred_get();
802 myuid = kauth_cred_getuid(mycred);
803
804 /* If we're running as root, the check passes */
805 if (kauth_cred_issuser(mycred)) {
806 return TRUE;
807 }
808
809 /* We're allowed to get our own task port */
810 if (target == current_proc()) {
811 return TRUE;
812 }
813
814 /*
815 * Under DENY, only root can get another proc's task port,
816 * so no more checks are needed.
817 */
818 if (tfp_policy == KERN_TFP_POLICY_DENY) {
819 return FALSE;
820 }
821
822 targetcred = kauth_cred_proc_ref(target);
823 allowed = TRUE;
824
825 /* Do target's ruid, euid, and saved uid match my euid? */
826 if ((kauth_cred_getuid(targetcred) != myuid) ||
827 (kauth_cred_getruid(targetcred) != myuid) ||
828 (kauth_cred_getsvuid(targetcred) != myuid)) {
829 allowed = FALSE;
830 goto out;
831 }
832
833 /* Are target's groups a subset of my groups? */
834 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
835 allowed == 0) {
836 allowed = FALSE;
837 goto out;
838 }
839
840 /* Has target switched credentials? */
841 if (target->p_flag & P_SUGID) {
842 allowed = FALSE;
843 goto out;
844 }
845
846 out:
847 kauth_cred_unref(&targetcred);
848 return allowed;
849 }
850
851 /*
852 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
853 *
854 * Description: Waits for the user space daemon to respond to the request
855 * we made. Function declared non inline to be visible in
856 * stackshots and spindumps as well as debugging.
857 */
858 __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)859 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
860 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
861 {
862 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
863 }
864
865 /*
866 * Routine: task_for_pid
867 * Purpose:
868 * Get the task port for another "process", named by its
869 * process ID on the same host as "target_task".
870 *
871 * Only permitted to privileged processes, or processes
872 * with the same user ID.
873 *
874 * Note: if pid == 0, an error is return no matter who is calling.
875 *
876 * XXX This should be a BSD system call, not a Mach trap!!!
877 */
878 kern_return_t
task_for_pid(struct task_for_pid_args * args)879 task_for_pid(
880 struct task_for_pid_args *args)
881 {
882 mach_port_name_t target_tport = args->target_tport;
883 int pid = args->pid;
884 user_addr_t task_addr = args->t;
885 proc_t p = PROC_NULL;
886 task_t t1 = TASK_NULL;
887 task_t task = TASK_NULL;
888 mach_port_name_t tret = MACH_PORT_NULL;
889 ipc_port_t tfpport = MACH_PORT_NULL;
890 void * sright = NULL;
891 int error = 0;
892 boolean_t is_current_proc = FALSE;
893 struct proc_ident pident = {0};
894
895 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
896 AUDIT_ARG(pid, pid);
897 AUDIT_ARG(mach_port1, target_tport);
898
899 /* Always check if pid == 0 */
900 if (pid == 0) {
901 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
902 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
903 return KERN_FAILURE;
904 }
905
906 t1 = port_name_to_task(target_tport);
907 if (t1 == TASK_NULL) {
908 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
909 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
910 return KERN_FAILURE;
911 }
912
913
914 p = proc_find(pid);
915 if (p == PROC_NULL) {
916 error = KERN_FAILURE;
917 goto tfpout;
918 }
919 pident = proc_ident(p);
920 is_current_proc = (p == current_proc());
921
922 #if CONFIG_AUDIT
923 AUDIT_ARG(process, p);
924 #endif
925
926 if (!(task_for_pid_posix_check(p))) {
927 error = KERN_FAILURE;
928 goto tfpout;
929 }
930
931 if (p->task == TASK_NULL) {
932 error = KERN_SUCCESS;
933 goto tfpout;
934 }
935
936 /*
937 * Grab a task reference and drop the proc reference as the proc ref
938 * shouldn't be held accross upcalls.
939 */
940 task = p->task;
941 task_reference(task);
942
943 proc_rele(p);
944 p = PROC_NULL;
945
946 #if CONFIG_MACF
947 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
948 if (error) {
949 error = KERN_FAILURE;
950 goto tfpout;
951 }
952 #endif
953
954 /* If we aren't root and target's task access port is set... */
955 if (!kauth_cred_issuser(kauth_cred_get()) &&
956 !is_current_proc &&
957 (task_get_task_access_port(task, &tfpport) == 0) &&
958 (tfpport != IPC_PORT_NULL)) {
959 if (tfpport == IPC_PORT_DEAD) {
960 error = KERN_PROTECTION_FAILURE;
961 goto tfpout;
962 }
963
964 /* Call up to the task access server */
965 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
966 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
967
968 if (error != MACH_MSG_SUCCESS) {
969 if (error == MACH_RCV_INTERRUPTED) {
970 error = KERN_ABORTED;
971 } else {
972 error = KERN_FAILURE;
973 }
974 goto tfpout;
975 }
976 }
977
978 /* Grant task port access */
979 extmod_statistics_incr_task_for_pid(task);
980
981 /* this reference will be consumed during conversion */
982 task_reference(task);
983 if (task == current_task()) {
984 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
985 sright = (void *)convert_task_to_port_pinned(task);
986 } else {
987 sright = (void *)convert_task_to_port(task);
988 }
989 /* extra task ref consumed */
990
991 /*
992 * Check if the task has been corpsified. We must do so after conversion
993 * since we don't hold locks and may have grabbed a corpse control port
994 * above which will prevent no-senders notification delivery.
995 */
996 if (is_corpsetask(task)) {
997 ipc_port_release_send(sright);
998 error = KERN_FAILURE;
999 goto tfpout;
1000 }
1001
1002 tret = ipc_port_copyout_send(
1003 sright,
1004 get_task_ipcspace(current_task()));
1005
1006 error = KERN_SUCCESS;
1007
1008 tfpout:
1009 task_deallocate(t1);
1010 AUDIT_ARG(mach_port2, tret);
1011 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1012
1013 if (tfpport != IPC_PORT_NULL) {
1014 ipc_port_release_send(tfpport);
1015 }
1016 if (task != TASK_NULL) {
1017 task_deallocate(task);
1018 }
1019 if (p != PROC_NULL) {
1020 proc_rele(p);
1021 }
1022 AUDIT_MACH_SYSCALL_EXIT(error);
1023 return error;
1024 }
1025
1026 /*
1027 * Routine: task_name_for_pid
1028 * Purpose:
1029 * Get the task name port for another "process", named by its
1030 * process ID on the same host as "target_task".
1031 *
1032 * Only permitted to privileged processes, or processes
1033 * with the same user ID.
1034 *
1035 * XXX This should be a BSD system call, not a Mach trap!!!
1036 */
1037
1038 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)1039 task_name_for_pid(
1040 struct task_name_for_pid_args *args)
1041 {
1042 mach_port_name_t target_tport = args->target_tport;
1043 int pid = args->pid;
1044 user_addr_t task_addr = args->t;
1045 proc_t p = PROC_NULL;
1046 task_t t1 = TASK_NULL;
1047 mach_port_name_t tret = MACH_PORT_NULL;
1048 void * sright;
1049 int error = 0, refheld = 0;
1050 kauth_cred_t target_cred;
1051
1052 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1053 AUDIT_ARG(pid, pid);
1054 AUDIT_ARG(mach_port1, target_tport);
1055
1056 t1 = port_name_to_task(target_tport);
1057 if (t1 == TASK_NULL) {
1058 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1059 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1060 return KERN_FAILURE;
1061 }
1062
1063 p = proc_find(pid);
1064 if (p != PROC_NULL) {
1065 AUDIT_ARG(process, p);
1066 target_cred = kauth_cred_proc_ref(p);
1067 refheld = 1;
1068
1069 if ((p->p_stat != SZOMB)
1070 && ((current_proc() == p)
1071 || kauth_cred_issuser(kauth_cred_get())
1072 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
1073 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
1074 if (p->task != TASK_NULL) {
1075 struct proc_ident pident = proc_ident(p);
1076
1077 task_t task = p->task;
1078
1079 task_reference(p->task);
1080 proc_rele(p);
1081 p = PROC_NULL;
1082 #if CONFIG_MACF
1083 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
1084 if (error) {
1085 task_deallocate(task);
1086 goto noperm;
1087 }
1088 #endif
1089 sright = (void *)convert_task_name_to_port(task);
1090 task = NULL;
1091 tret = ipc_port_copyout_send(sright,
1092 get_task_ipcspace(current_task()));
1093 } else {
1094 tret = MACH_PORT_NULL;
1095 }
1096
1097 AUDIT_ARG(mach_port2, tret);
1098 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1099 task_deallocate(t1);
1100 error = KERN_SUCCESS;
1101 goto tnfpout;
1102 }
1103 }
1104
1105 #if CONFIG_MACF
1106 noperm:
1107 #endif
1108 task_deallocate(t1);
1109 tret = MACH_PORT_NULL;
1110 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1111 error = KERN_FAILURE;
1112 tnfpout:
1113 if (refheld != 0) {
1114 kauth_cred_unref(&target_cred);
1115 }
1116 if (p != PROC_NULL) {
1117 proc_rele(p);
1118 }
1119 AUDIT_MACH_SYSCALL_EXIT(error);
1120 return error;
1121 }
1122
1123 /*
1124 * Routine: task_inspect_for_pid
1125 * Purpose:
1126 * Get the task inspect port for another "process", named by its
1127 * process ID on the same host as "target_task".
1128 */
1129 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)1130 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1131 {
1132 mach_port_name_t target_tport = args->target_tport;
1133 int pid = args->pid;
1134 user_addr_t task_addr = args->t;
1135
1136 proc_t proc = PROC_NULL;
1137 task_t t1 = TASK_NULL;
1138 task_inspect_t task_insp = TASK_INSPECT_NULL;
1139 mach_port_name_t tret = MACH_PORT_NULL;
1140 ipc_port_t tfpport = MACH_PORT_NULL;
1141 int error = 0;
1142 void *sright = NULL;
1143 boolean_t is_current_proc = FALSE;
1144 struct proc_ident pident = {0};
1145
1146 /* Disallow inspect port for kernel_task */
1147 if (pid == 0) {
1148 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1149 return EPERM;
1150 }
1151
1152 t1 = port_name_to_task(target_tport);
1153 if (t1 == TASK_NULL) {
1154 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1155 return EINVAL;
1156 }
1157
1158 proc = proc_find(pid);
1159 if (proc == PROC_NULL) {
1160 error = ESRCH;
1161 goto tifpout;
1162 }
1163 pident = proc_ident(proc);
1164 is_current_proc = (proc == current_proc());
1165
1166 if (!(task_for_pid_posix_check(proc))) {
1167 error = EPERM;
1168 goto tifpout;
1169 }
1170
1171 task_insp = proc->task;
1172 if (task_insp == TASK_INSPECT_NULL) {
1173 goto tifpout;
1174 }
1175
1176 /*
1177 * Grab a task reference and drop the proc reference before making any upcalls.
1178 */
1179 task_reference(task_insp);
1180
1181 proc_rele(proc);
1182 proc = PROC_NULL;
1183
1184 #if CONFIG_MACF
1185 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
1186 if (error) {
1187 error = EPERM;
1188 goto tifpout;
1189 }
1190 #endif
1191
1192 /* If we aren't root and target's task access port is set... */
1193 if (!kauth_cred_issuser(kauth_cred_get()) &&
1194 !is_current_proc &&
1195 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1196 (tfpport != IPC_PORT_NULL)) {
1197 if (tfpport == IPC_PORT_DEAD) {
1198 error = EACCES;
1199 goto tifpout;
1200 }
1201
1202
1203 /* Call up to the task access server */
1204 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1205 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
1206
1207 if (error != MACH_MSG_SUCCESS) {
1208 if (error == MACH_RCV_INTERRUPTED) {
1209 error = EINTR;
1210 } else {
1211 error = EPERM;
1212 }
1213 goto tifpout;
1214 }
1215 }
1216
1217 /* Check if the task has been corpsified */
1218 if (is_corpsetask(task_insp)) {
1219 error = EACCES;
1220 goto tifpout;
1221 }
1222
1223 /* could be IP_NULL, consumes a ref */
1224 sright = (void*) convert_task_inspect_to_port(task_insp);
1225 task_insp = TASK_INSPECT_NULL;
1226 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1227
1228 tifpout:
1229 task_deallocate(t1);
1230 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1231 if (proc != PROC_NULL) {
1232 proc_rele(proc);
1233 }
1234 if (tfpport != IPC_PORT_NULL) {
1235 ipc_port_release_send(tfpport);
1236 }
1237 if (task_insp != TASK_INSPECT_NULL) {
1238 task_deallocate(task_insp);
1239 }
1240
1241 *ret = error;
1242 return error;
1243 }
1244
1245 /*
1246 * Routine: task_read_for_pid
1247 * Purpose:
1248 * Get the task read port for another "process", named by its
1249 * process ID on the same host as "target_task".
1250 */
1251 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)1252 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1253 {
1254 mach_port_name_t target_tport = args->target_tport;
1255 int pid = args->pid;
1256 user_addr_t task_addr = args->t;
1257
1258 proc_t proc = PROC_NULL;
1259 task_t t1 = TASK_NULL;
1260 task_read_t task_read = TASK_READ_NULL;
1261 mach_port_name_t tret = MACH_PORT_NULL;
1262 ipc_port_t tfpport = MACH_PORT_NULL;
1263 int error = 0;
1264 void *sright = NULL;
1265 boolean_t is_current_proc = FALSE;
1266 struct proc_ident pident = {0};
1267
1268 /* Disallow read port for kernel_task */
1269 if (pid == 0) {
1270 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1271 return EPERM;
1272 }
1273
1274 t1 = port_name_to_task(target_tport);
1275 if (t1 == TASK_NULL) {
1276 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1277 return EINVAL;
1278 }
1279
1280 proc = proc_find(pid);
1281 if (proc == PROC_NULL) {
1282 error = ESRCH;
1283 goto trfpout;
1284 }
1285 pident = proc_ident(proc);
1286 is_current_proc = (proc == current_proc());
1287
1288 if (!(task_for_pid_posix_check(proc))) {
1289 error = EPERM;
1290 goto trfpout;
1291 }
1292
1293 task_read = proc->task;
1294 if (task_read == TASK_INSPECT_NULL) {
1295 goto trfpout;
1296 }
1297
1298 /*
1299 * Grab a task reference and drop the proc reference before making any upcalls.
1300 */
1301 task_reference(task_read);
1302
1303 proc_rele(proc);
1304 proc = PROC_NULL;
1305
1306 #if CONFIG_MACF
1307 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
1308 if (error) {
1309 error = EPERM;
1310 goto trfpout;
1311 }
1312 #endif
1313
1314 /* If we aren't root and target's task access port is set... */
1315 if (!kauth_cred_issuser(kauth_cred_get()) &&
1316 !is_current_proc &&
1317 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1318 (tfpport != IPC_PORT_NULL)) {
1319 if (tfpport == IPC_PORT_DEAD) {
1320 error = EACCES;
1321 goto trfpout;
1322 }
1323
1324
1325 /* Call up to the task access server */
1326 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1327 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
1328
1329 if (error != MACH_MSG_SUCCESS) {
1330 if (error == MACH_RCV_INTERRUPTED) {
1331 error = EINTR;
1332 } else {
1333 error = EPERM;
1334 }
1335 goto trfpout;
1336 }
1337 }
1338
1339 /* Check if the task has been corpsified */
1340 if (is_corpsetask(task_read)) {
1341 error = EACCES;
1342 goto trfpout;
1343 }
1344
1345 /* could be IP_NULL, consumes a ref */
1346 sright = (void*) convert_task_read_to_port(task_read);
1347 task_read = TASK_READ_NULL;
1348 tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
1349
1350 trfpout:
1351 task_deallocate(t1);
1352 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1353 if (proc != PROC_NULL) {
1354 proc_rele(proc);
1355 }
1356 if (tfpport != IPC_PORT_NULL) {
1357 ipc_port_release_send(tfpport);
1358 }
1359 if (task_read != TASK_READ_NULL) {
1360 task_deallocate(task_read);
1361 }
1362
1363 *ret = error;
1364 return error;
1365 }
1366
1367 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)1368 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1369 {
1370 task_t target = NULL;
1371 proc_t targetproc = PROC_NULL;
1372 int pid = args->pid;
1373 int error = 0;
1374 mach_port_t tfpport = MACH_PORT_NULL;
1375
1376 if (pid == 0) {
1377 error = EPERM;
1378 goto out;
1379 }
1380
1381 targetproc = proc_find(pid);
1382 if (targetproc == PROC_NULL) {
1383 error = ESRCH;
1384 goto out;
1385 }
1386
1387 if (!task_for_pid_posix_check(targetproc) &&
1388 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1389 error = EPERM;
1390 goto out;
1391 }
1392
1393 #if CONFIG_MACF
1394 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
1395 if (error) {
1396 error = EPERM;
1397 goto out;
1398 }
1399 #endif
1400
1401 target = targetproc->task;
1402 #if XNU_TARGET_OS_OSX
1403 if (target != TASK_NULL) {
1404 /* If we aren't root and target's task access port is set... */
1405 if (!kauth_cred_issuser(kauth_cred_get()) &&
1406 targetproc != current_proc() &&
1407 (task_get_task_access_port(target, &tfpport) == 0) &&
1408 (tfpport != IPC_PORT_NULL)) {
1409 if (tfpport == IPC_PORT_DEAD) {
1410 error = EACCES;
1411 goto out;
1412 }
1413
1414 /* Call up to the task access server */
1415 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1416 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1417
1418 if (error != MACH_MSG_SUCCESS) {
1419 if (error == MACH_RCV_INTERRUPTED) {
1420 error = EINTR;
1421 } else {
1422 error = EPERM;
1423 }
1424 goto out;
1425 }
1426 }
1427 }
1428 #endif /* XNU_TARGET_OS_OSX */
1429
1430 task_reference(target);
1431 error = task_pidsuspend(target);
1432 if (error) {
1433 if (error == KERN_INVALID_ARGUMENT) {
1434 error = EINVAL;
1435 } else {
1436 error = EPERM;
1437 }
1438 }
1439 #if CONFIG_MEMORYSTATUS
1440 else {
1441 memorystatus_on_suspend(targetproc);
1442 }
1443 #endif
1444
1445 task_deallocate(target);
1446
1447 out:
1448 if (tfpport != IPC_PORT_NULL) {
1449 ipc_port_release_send(tfpport);
1450 }
1451
1452 if (targetproc != PROC_NULL) {
1453 proc_rele(targetproc);
1454 }
1455 *ret = error;
1456 return error;
1457 }
1458
1459 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)1460 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1461 {
1462 mach_port_name_t target_tport = args->target_tport;
1463 int pid = args->pid;
1464 user_addr_t task_addr = args->t;
1465 proc_t p = PROC_NULL;
1466 task_t t1 = TASK_NULL;
1467 task_t task = TASK_NULL;
1468 mach_port_name_t tret = MACH_PORT_NULL;
1469 ipc_port_t tfpport = MACH_PORT_NULL;
1470 ipc_port_t sright = NULL;
1471 int error = 0;
1472 boolean_t is_current_proc = FALSE;
1473 struct proc_ident pident = {0};
1474
1475 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1476 AUDIT_ARG(pid, pid);
1477 AUDIT_ARG(mach_port1, target_tport);
1478
1479 /* Always check if pid == 0 */
1480 if (pid == 0) {
1481 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1482 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1483 return KERN_FAILURE;
1484 }
1485
1486 t1 = port_name_to_task(target_tport);
1487 if (t1 == TASK_NULL) {
1488 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1489 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1490 return KERN_FAILURE;
1491 }
1492
1493 p = proc_find(pid);
1494 if (p == PROC_NULL) {
1495 error = KERN_FAILURE;
1496 goto tfpout;
1497 }
1498 pident = proc_ident(p);
1499 is_current_proc = (p == current_proc());
1500
1501 #if CONFIG_AUDIT
1502 AUDIT_ARG(process, p);
1503 #endif
1504
1505 if (!(task_for_pid_posix_check(p))) {
1506 error = KERN_FAILURE;
1507 goto tfpout;
1508 }
1509
1510 if (p->task == TASK_NULL) {
1511 error = KERN_SUCCESS;
1512 goto tfpout;
1513 }
1514
1515 /*
1516 * Grab a task reference and drop the proc reference before making any upcalls.
1517 */
1518 task = p->task;
1519 task_reference(task);
1520
1521 proc_rele(p);
1522 p = PROC_NULL;
1523
1524 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1525 #if CONFIG_MACF
1526 error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
1527 if (error) {
1528 error = KERN_FAILURE;
1529 goto tfpout;
1530 }
1531 #endif
1532
1533 /* If we aren't root and target's task access port is set... */
1534 if (!kauth_cred_issuser(kauth_cred_get()) &&
1535 !is_current_proc &&
1536 (task_get_task_access_port(task, &tfpport) == 0) &&
1537 (tfpport != IPC_PORT_NULL)) {
1538 if (tfpport == IPC_PORT_DEAD) {
1539 error = KERN_PROTECTION_FAILURE;
1540 goto tfpout;
1541 }
1542
1543
1544 /* Call up to the task access server */
1545 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1546 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1547
1548 if (error != MACH_MSG_SUCCESS) {
1549 if (error == MACH_RCV_INTERRUPTED) {
1550 error = KERN_ABORTED;
1551 } else {
1552 error = KERN_FAILURE;
1553 }
1554 goto tfpout;
1555 }
1556 }
1557 }
1558
1559 /* Check if the task has been corpsified */
1560 if (is_corpsetask(task)) {
1561 error = KERN_FAILURE;
1562 goto tfpout;
1563 }
1564
1565 error = task_get_debug_control_port(task, &sright);
1566 if (error != KERN_SUCCESS) {
1567 goto tfpout;
1568 }
1569
1570 tret = ipc_port_copyout_send(
1571 sright,
1572 get_task_ipcspace(current_task()));
1573
1574 error = KERN_SUCCESS;
1575
1576 tfpout:
1577 task_deallocate(t1);
1578 AUDIT_ARG(mach_port2, tret);
1579 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1580
1581 if (tfpport != IPC_PORT_NULL) {
1582 ipc_port_release_send(tfpport);
1583 }
1584 if (task != TASK_NULL) {
1585 task_deallocate(task);
1586 }
1587 if (p != PROC_NULL) {
1588 proc_rele(p);
1589 }
1590 AUDIT_MACH_SYSCALL_EXIT(error);
1591 return error;
1592 }
1593
1594 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)1595 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1596 {
1597 task_t target = NULL;
1598 proc_t targetproc = PROC_NULL;
1599 int pid = args->pid;
1600 int error = 0;
1601 mach_port_t tfpport = MACH_PORT_NULL;
1602
1603 if (pid == 0) {
1604 error = EPERM;
1605 goto out;
1606 }
1607
1608 targetproc = proc_find(pid);
1609 if (targetproc == PROC_NULL) {
1610 error = ESRCH;
1611 goto out;
1612 }
1613
1614 if (!task_for_pid_posix_check(targetproc) &&
1615 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1616 error = EPERM;
1617 goto out;
1618 }
1619
1620 #if CONFIG_MACF
1621 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
1622 if (error) {
1623 error = EPERM;
1624 goto out;
1625 }
1626 #endif
1627
1628 target = targetproc->task;
1629 #if XNU_TARGET_OS_OSX
1630 if (target != TASK_NULL) {
1631 /* If we aren't root and target's task access port is set... */
1632 if (!kauth_cred_issuser(kauth_cred_get()) &&
1633 targetproc != current_proc() &&
1634 (task_get_task_access_port(target, &tfpport) == 0) &&
1635 (tfpport != IPC_PORT_NULL)) {
1636 if (tfpport == IPC_PORT_DEAD) {
1637 error = EACCES;
1638 goto out;
1639 }
1640
1641 /* Call up to the task access server */
1642 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
1643 proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
1644
1645 if (error != MACH_MSG_SUCCESS) {
1646 if (error == MACH_RCV_INTERRUPTED) {
1647 error = EINTR;
1648 } else {
1649 error = EPERM;
1650 }
1651 goto out;
1652 }
1653 }
1654 }
1655 #endif /* XNU_TARGET_OS_OSX */
1656
1657 #if !XNU_TARGET_OS_OSX
1658 #if SOCKETS
1659 resume_proc_sockets(targetproc);
1660 #endif /* SOCKETS */
1661 #endif /* !XNU_TARGET_OS_OSX */
1662
1663 task_reference(target);
1664
1665 #if CONFIG_MEMORYSTATUS
1666 memorystatus_on_resume(targetproc);
1667 #endif
1668
1669 error = task_pidresume(target);
1670 if (error) {
1671 if (error == KERN_INVALID_ARGUMENT) {
1672 error = EINVAL;
1673 } else {
1674 if (error == KERN_MEMORY_ERROR) {
1675 psignal(targetproc, SIGKILL);
1676 error = EIO;
1677 } else {
1678 error = EPERM;
1679 }
1680 }
1681 }
1682
1683 task_deallocate(target);
1684
1685 out:
1686 if (tfpport != IPC_PORT_NULL) {
1687 ipc_port_release_send(tfpport);
1688 }
1689
1690 if (targetproc != PROC_NULL) {
1691 proc_rele(targetproc);
1692 }
1693
1694 *ret = error;
1695 return error;
1696 }
1697
1698 #if !XNU_TARGET_OS_OSX
1699 /*
1700 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1701 * When a process is specified, this call is blocking, otherwise we wake up the
1702 * freezer thread and do not block on a process being frozen.
1703 */
1704 kern_return_t
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)1705 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1706 {
1707 int error = 0;
1708 proc_t targetproc = PROC_NULL;
1709 int pid = args->pid;
1710
1711 #ifndef CONFIG_FREEZE
1712 #pragma unused(pid)
1713 #else
1714
1715 /*
1716 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1717 */
1718
1719 if (pid >= 0) {
1720 targetproc = proc_find(pid);
1721
1722 if (targetproc == PROC_NULL) {
1723 error = ESRCH;
1724 goto out;
1725 }
1726
1727 if (!task_for_pid_posix_check(targetproc)) {
1728 error = EPERM;
1729 goto out;
1730 }
1731 }
1732
1733 #if CONFIG_MACF
1734 //Note that targetproc may be null
1735 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1736 if (error) {
1737 error = EPERM;
1738 goto out;
1739 }
1740 #endif
1741
1742 if (pid == -2) {
1743 vm_pageout_anonymous_pages();
1744 } else if (pid == -1) {
1745 memorystatus_on_inactivity(targetproc);
1746 } else {
1747 error = memorystatus_freeze_process_sync(targetproc);
1748 }
1749
1750 out:
1751
1752 #endif /* CONFIG_FREEZE */
1753
1754 if (targetproc != PROC_NULL) {
1755 proc_rele(targetproc);
1756 }
1757 *ret = error;
1758 return error;
1759 }
1760 #endif /* !XNU_TARGET_OS_OSX */
1761
1762 #if SOCKETS
1763 int
networking_memstatus_callout(proc_t p,uint32_t status)1764 networking_memstatus_callout(proc_t p, uint32_t status)
1765 {
1766 struct fileproc *fp;
1767
1768 /*
1769 * proc list lock NOT held
1770 * proc lock NOT held
1771 * a reference on the proc has been held / shall be dropped by the caller.
1772 */
1773 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1774 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1775
1776 proc_fdlock(p);
1777
1778 fdt_foreach(fp, p) {
1779 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1780 #if NECP
1781 case DTYPE_NETPOLICY:
1782 necp_fd_memstatus(p, status,
1783 (struct necp_fd_data *)fp_get_data(fp));
1784 break;
1785 #endif /* NECP */
1786 #if SKYWALK
1787 case DTYPE_CHANNEL:
1788 kern_channel_memstatus(p, status,
1789 (struct kern_channel *)fp_get_data(fp));
1790 break;
1791 #endif /* SKYWALK */
1792 default:
1793 break;
1794 }
1795 }
1796 proc_fdunlock(p);
1797
1798 return 1;
1799 }
1800
1801 #if SKYWALK
1802 /*
1803 * Since we make multiple passes across the fileproc array, record the
1804 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1805 * large enough to accomodate most, if not all cases. If we find more,
1806 * we'll go to the slow path during second pass.
1807 */
1808 #define MAX_CHANNELS 8 /* should be more than enough */
1809 #endif /* SKYWALK */
1810
1811 static int
networking_defunct_callout(proc_t p,void * arg)1812 networking_defunct_callout(proc_t p, void *arg)
1813 {
1814 struct pid_shutdown_sockets_args *args = arg;
1815 int pid = args->pid;
1816 int level = args->level;
1817 struct fileproc *fp;
1818 #if SKYWALK
1819 int i;
1820 int channel_count = 0;
1821 struct kern_channel *channel_array[MAX_CHANNELS];
1822
1823 bzero(&channel_array, sizeof(channel_array));
1824 #endif /* SKYWALK */
1825
1826 proc_fdlock(p);
1827
1828 fdt_foreach(fp, p) {
1829 struct fileglob *fg = fp->fp_glob;
1830
1831 switch (FILEGLOB_DTYPE(fg)) {
1832 case DTYPE_SOCKET: {
1833 struct socket *so = (struct socket *)fg_get_data(fg);
1834 if (proc_getpid(p) == pid || so->last_pid == pid ||
1835 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1836 /* Call networking stack with socket and level */
1837 (void)socket_defunct(p, so, level);
1838 }
1839 break;
1840 }
1841 #if NECP
1842 case DTYPE_NETPOLICY:
1843 /* first pass: defunct necp and get stats for ntstat */
1844 if (proc_getpid(p) == pid) {
1845 necp_fd_defunct(p,
1846 (struct necp_fd_data *)fg_get_data(fg));
1847 }
1848 break;
1849 #endif /* NECP */
1850 #if SKYWALK
1851 case DTYPE_CHANNEL:
1852 /* first pass: get channels and total count */
1853 if (proc_getpid(p) == pid) {
1854 if (channel_count < MAX_CHANNELS) {
1855 channel_array[channel_count] =
1856 (struct kern_channel *)fg_get_data(fg);
1857 }
1858 ++channel_count;
1859 }
1860 break;
1861 #endif /* SKYWALK */
1862 default:
1863 break;
1864 }
1865 }
1866
1867 #if SKYWALK
1868 /*
1869 * Second pass: defunct channels/flows (after NECP). Handle
1870 * the common case of up to MAX_CHANNELS count with fast path,
1871 * and traverse the fileproc array again only if we exceed it.
1872 */
1873 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1874 ASSERT(proc_getpid(p) == pid);
1875 for (i = 0; i < channel_count; i++) {
1876 ASSERT(channel_array[i] != NULL);
1877 kern_channel_defunct(p, channel_array[i]);
1878 }
1879 } else if (channel_count != 0) {
1880 ASSERT(proc_getpid(p) == pid);
1881 fdt_foreach(fp, p) {
1882 struct fileglob *fg = fp->fp_glob;
1883
1884 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1885 kern_channel_defunct(p,
1886 (struct kern_channel *)fg_get_data(fg));
1887 }
1888 }
1889 }
1890 #endif /* SKYWALK */
1891 proc_fdunlock(p);
1892
1893 return PROC_RETURNED;
1894 }
1895
1896 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)1897 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1898 {
1899 int error = 0;
1900 proc_t targetproc = PROC_NULL;
1901 int pid = args->pid;
1902 int level = args->level;
1903
1904 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1905 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1906 error = EINVAL;
1907 goto out;
1908 }
1909
1910 targetproc = proc_find(pid);
1911 if (targetproc == PROC_NULL) {
1912 error = ESRCH;
1913 goto out;
1914 }
1915
1916 if (!task_for_pid_posix_check(targetproc) &&
1917 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1918 error = EPERM;
1919 goto out;
1920 }
1921
1922 #if CONFIG_MACF
1923 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1924 if (error) {
1925 error = EPERM;
1926 goto out;
1927 }
1928 #endif
1929
1930 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
1931 networking_defunct_callout, args, NULL, NULL);
1932
1933 out:
1934 if (targetproc != PROC_NULL) {
1935 proc_rele(targetproc);
1936 }
1937 *ret = error;
1938 return error;
1939 }
1940
1941 #endif /* SOCKETS */
1942
1943 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)1944 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1945 __unused int arg2, struct sysctl_req *req)
1946 {
1947 int error = 0;
1948 int new_value;
1949
1950 error = SYSCTL_OUT(req, arg1, sizeof(int));
1951 if (error || req->newptr == USER_ADDR_NULL) {
1952 return error;
1953 }
1954
1955 if (!kauth_cred_issuser(kauth_cred_get())) {
1956 return EPERM;
1957 }
1958
1959 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1960 goto out;
1961 }
1962 if ((new_value == KERN_TFP_POLICY_DENY)
1963 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
1964 tfp_policy = new_value;
1965 } else {
1966 error = EINVAL;
1967 }
1968 out:
1969 return error;
1970 }
1971
1972 #if defined(SECURE_KERNEL)
1973 static int kern_secure_kernel = 1;
1974 #else
1975 static int kern_secure_kernel = 0;
1976 #endif
1977
1978 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1979
1980 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1981 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1982 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
1983
1984 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1985 &shared_region_trace_level, 0, "");
1986 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1987 &shared_region_version, 0, "");
1988 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1989 &shared_region_persistence, 0, "");
1990
1991 /*
1992 * shared_region_check_np:
1993 *
1994 * This system call is intended for dyld.
1995 *
1996 * dyld calls this when any process starts to see if the process's shared
1997 * region is already set up and ready to use.
1998 * This call returns the base address of the first mapping in the
1999 * process's shared region's first mapping.
2000 * dyld will then check what's mapped at that address.
2001 *
2002 * If the shared region is empty, dyld will then attempt to map the shared
2003 * cache file in the shared region via the shared_region_map_np() system call.
2004 *
2005 * If something's already mapped in the shared region, dyld will check if it
2006 * matches the shared cache it would like to use for that process.
2007 * If it matches, evrything's ready and the process can proceed and use the
2008 * shared region.
2009 * If it doesn't match, dyld will unmap the shared region and map the shared
2010 * cache into the process's address space via mmap().
2011 *
2012 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2013 * the shared region. We will remove the shared_region reference from the task.
2014 *
2015 * ERROR VALUES
2016 * EINVAL no shared region
2017 * ENOMEM shared region is empty
2018 * EFAULT bad address for "start_address"
2019 */
2020 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)2021 shared_region_check_np(
2022 __unused struct proc *p,
2023 struct shared_region_check_np_args *uap,
2024 __unused int *retvalp)
2025 {
2026 vm_shared_region_t shared_region;
2027 mach_vm_offset_t start_address = 0;
2028 int error = 0;
2029 kern_return_t kr;
2030 task_t task = current_task();
2031
2032 SHARED_REGION_TRACE_DEBUG(
2033 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2034 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2035 proc_getpid(p), p->p_comm,
2036 (uint64_t)uap->start_address));
2037
2038 /* retrieve the current tasks's shared region */
2039 shared_region = vm_shared_region_get(task);
2040 if (shared_region != NULL) {
2041 /*
2042 * A NULL argument is used by dyld to indicate the task
2043 * has unmapped its shared region.
2044 */
2045 if (uap->start_address == 0) {
2046 vm_shared_region_set(task, NULL);
2047 } else {
2048 /* retrieve address of its first mapping... */
2049 kr = vm_shared_region_start_address(shared_region, &start_address, task);
2050 if (kr != KERN_SUCCESS) {
2051 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2052 "check_np(0x%llx) "
2053 "vm_shared_region_start_address() failed\n",
2054 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2055 proc_getpid(p), p->p_comm,
2056 (uint64_t)uap->start_address));
2057 error = ENOMEM;
2058 } else {
2059 #if __has_feature(ptrauth_calls)
2060 /*
2061 * Remap any section of the shared library that
2062 * has authenticated pointers into private memory.
2063 */
2064 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2065 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2066 "check_np(0x%llx) "
2067 "vm_shared_region_auth_remap() failed\n",
2068 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2069 proc_getpid(p), p->p_comm,
2070 (uint64_t)uap->start_address));
2071 error = ENOMEM;
2072 }
2073 #endif /* __has_feature(ptrauth_calls) */
2074
2075 /* ... and give it to the caller */
2076 if (error == 0) {
2077 error = copyout(&start_address,
2078 (user_addr_t) uap->start_address,
2079 sizeof(start_address));
2080 if (error != 0) {
2081 SHARED_REGION_TRACE_ERROR(
2082 ("shared_region: %p [%d(%s)] "
2083 "check_np(0x%llx) "
2084 "copyout(0x%llx) error %d\n",
2085 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2086 proc_getpid(p), p->p_comm,
2087 (uint64_t)uap->start_address, (uint64_t)start_address,
2088 error));
2089 }
2090 }
2091 }
2092 }
2093 vm_shared_region_deallocate(shared_region);
2094 } else {
2095 /* no shared region ! */
2096 error = EINVAL;
2097 }
2098
2099 SHARED_REGION_TRACE_DEBUG(
2100 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2101 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2102 proc_getpid(p), p->p_comm,
2103 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2104
2105 return error;
2106 }
2107
2108
2109 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)2110 shared_region_copyin(
2111 struct proc *p,
2112 user_addr_t user_addr,
2113 unsigned int count,
2114 unsigned int element_size,
2115 void *kernel_data)
2116 {
2117 int error = 0;
2118 vm_size_t size = count * element_size;
2119
2120 error = copyin(user_addr, kernel_data, size);
2121 if (error) {
2122 SHARED_REGION_TRACE_ERROR(
2123 ("shared_region: %p [%d(%s)] map(): "
2124 "copyin(0x%llx, %ld) failed (error=%d)\n",
2125 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2126 proc_getpid(p), p->p_comm,
2127 (uint64_t)user_addr, (long)size, error));
2128 }
2129 return error;
2130 }
2131
2132 /*
2133 * A reasonable upper limit to prevent overflow of allocation/copyin.
2134 */
2135 #define _SR_FILE_MAPPINGS_MAX_FILES 256
2136
2137 /* forward declaration */
2138 __attribute__((noinline))
2139 static void shared_region_map_and_slide_cleanup(
2140 struct proc *p,
2141 uint32_t files_count,
2142 struct _sr_file_mappings *sr_file_mappings,
2143 struct vm_shared_region *shared_region,
2144 struct vnode *scdir_vp);
2145
2146 /*
2147 * Setup part of _shared_region_map_and_slide().
2148 * It had to be broken out of _shared_region_map_and_slide() to
2149 * prevent compiler inlining from blowing out the stack.
2150 */
2151 __attribute__((noinline))
2152 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode ** scdir_vp,struct vnode * rdir_vp)2153 shared_region_map_and_slide_setup(
2154 struct proc *p,
2155 uint32_t files_count,
2156 struct shared_file_np *files,
2157 uint32_t mappings_count,
2158 struct shared_file_mapping_slide_np *mappings,
2159 struct _sr_file_mappings **sr_file_mappings,
2160 struct vm_shared_region **shared_region_ptr,
2161 struct vnode **scdir_vp,
2162 struct vnode *rdir_vp)
2163 {
2164 int error = 0;
2165 struct _sr_file_mappings *srfmp;
2166 uint32_t mappings_next;
2167 struct vnode_attr va;
2168 off_t fs;
2169 #if CONFIG_MACF
2170 vm_prot_t maxprot = VM_PROT_ALL;
2171 #endif
2172 uint32_t i;
2173 struct vm_shared_region *shared_region = NULL;
2174 boolean_t is_driverkit = task_is_driver(current_task());
2175 const char *expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2176
2177 SHARED_REGION_TRACE_DEBUG(
2178 ("shared_region: %p [%d(%s)] -> map\n",
2179 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2180 proc_getpid(p), p->p_comm));
2181
2182 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2183 error = E2BIG;
2184 goto done;
2185 }
2186 if (files_count == 0) {
2187 error = EINVAL;
2188 goto done;
2189 }
2190 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2191 Z_WAITOK | Z_ZERO);
2192 if (*sr_file_mappings == NULL) {
2193 error = ENOMEM;
2194 goto done;
2195 }
2196 mappings_next = 0;
2197 for (i = 0; i < files_count; i++) {
2198 srfmp = &(*sr_file_mappings)[i];
2199 srfmp->fd = files[i].sf_fd;
2200 srfmp->mappings_count = files[i].sf_mappings_count;
2201 srfmp->mappings = &mappings[mappings_next];
2202 mappings_next += srfmp->mappings_count;
2203 if (mappings_next > mappings_count) {
2204 error = EINVAL;
2205 goto done;
2206 }
2207 srfmp->slide = files[i].sf_slide;
2208 }
2209
2210 if (scdir_enforce) {
2211 /* get vnode for expected_scdir_path */
2212 error = vnode_lookup(expected_scdir_path, 0, scdir_vp, vfs_context_current());
2213 if (error) {
2214 SHARED_REGION_TRACE_ERROR(
2215 ("shared_region: %p [%d(%s)]: "
2216 "vnode_lookup(%s) failed (error=%d)\n",
2217 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2218 proc_getpid(p), p->p_comm,
2219 expected_scdir_path, error));
2220 goto done;
2221 }
2222 }
2223
2224 /* get the process's shared region (setup in vm_map_exec()) */
2225 shared_region = vm_shared_region_trim_and_get(current_task());
2226 *shared_region_ptr = shared_region;
2227 if (shared_region == NULL) {
2228 SHARED_REGION_TRACE_ERROR(
2229 ("shared_region: %p [%d(%s)] map(): "
2230 "no shared region\n",
2231 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2232 proc_getpid(p), p->p_comm));
2233 error = EINVAL;
2234 goto done;
2235 }
2236
2237 /*
2238 * Check the shared region matches the current root
2239 * directory of this process. Deny the mapping to
2240 * avoid tainting the shared region with something that
2241 * doesn't quite belong into it.
2242 */
2243 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2244 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2245 SHARED_REGION_TRACE_ERROR(
2246 ("shared_region: map(%p) root_dir mismatch\n",
2247 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2248 error = EPERM;
2249 goto done;
2250 }
2251
2252
2253 for (srfmp = &(*sr_file_mappings)[0];
2254 srfmp < &(*sr_file_mappings)[files_count];
2255 srfmp++) {
2256 if (srfmp->mappings_count == 0) {
2257 /* no mappings here... */
2258 continue;
2259 }
2260
2261 /* get file structure from file descriptor */
2262 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
2263 if (error) {
2264 SHARED_REGION_TRACE_ERROR(
2265 ("shared_region: %p [%d(%s)] map: "
2266 "fd=%d lookup failed (error=%d)\n",
2267 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2268 proc_getpid(p), p->p_comm, srfmp->fd, error));
2269 goto done;
2270 }
2271
2272 /* we need at least read permission on the file */
2273 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2274 SHARED_REGION_TRACE_ERROR(
2275 ("shared_region: %p [%d(%s)] map: "
2276 "fd=%d not readable\n",
2277 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2278 proc_getpid(p), p->p_comm, srfmp->fd));
2279 error = EPERM;
2280 goto done;
2281 }
2282
2283 /* get vnode from file structure */
2284 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
2285 if (error) {
2286 SHARED_REGION_TRACE_ERROR(
2287 ("shared_region: %p [%d(%s)] map: "
2288 "fd=%d getwithref failed (error=%d)\n",
2289 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2290 proc_getpid(p), p->p_comm, srfmp->fd, error));
2291 goto done;
2292 }
2293 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
2294
2295 /* make sure the vnode is a regular file */
2296 if (srfmp->vp->v_type != VREG) {
2297 SHARED_REGION_TRACE_ERROR(
2298 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2299 "not a file (type=%d)\n",
2300 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2301 proc_getpid(p), p->p_comm,
2302 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2303 srfmp->vp->v_name, srfmp->vp->v_type));
2304 error = EINVAL;
2305 goto done;
2306 }
2307
2308 #if CONFIG_MACF
2309 /* pass in 0 for the offset argument because AMFI does not need the offset
2310 * of the shared cache */
2311 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2312 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
2313 if (error) {
2314 goto done;
2315 }
2316 #endif /* MAC */
2317
2318 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2319 /*
2320 * Check if the shared cache is in the trust cache;
2321 * if so, we can skip the root ownership check.
2322 */
2323 #if DEVELOPMENT || DEBUG
2324 /*
2325 * Skip both root ownership and trust cache check if
2326 * enforcement is disabled.
2327 */
2328 if (!cs_system_enforcement()) {
2329 goto after_root_check;
2330 }
2331 #endif /* DEVELOPMENT || DEBUG */
2332 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2333 if (blob == NULL) {
2334 SHARED_REGION_TRACE_ERROR(
2335 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2336 "missing CS blob\n",
2337 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2338 proc_getpid(p), p->p_comm,
2339 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2340 srfmp->vp->v_name));
2341 goto root_check;
2342 }
2343 const uint8_t *cdhash = csblob_get_cdhash(blob);
2344 if (cdhash == NULL) {
2345 SHARED_REGION_TRACE_ERROR(
2346 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2347 "missing cdhash\n",
2348 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2349 proc_getpid(p), p->p_comm,
2350 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2351 srfmp->vp->v_name));
2352 goto root_check;
2353 }
2354 uint32_t result = pmap_lookup_in_static_trust_cache(cdhash);
2355 boolean_t in_trust_cache = result & (TC_LOOKUP_FOUND << TC_LOOKUP_RESULT_SHIFT);
2356 if (!in_trust_cache) {
2357 SHARED_REGION_TRACE_ERROR(
2358 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2359 "not in trust cache\n",
2360 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2361 proc_getpid(p), p->p_comm,
2362 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2363 srfmp->vp->v_name));
2364 goto root_check;
2365 }
2366 goto after_root_check;
2367 root_check:
2368 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2369
2370 /* The shared cache file must be owned by root */
2371 VATTR_INIT(&va);
2372 VATTR_WANTED(&va, va_uid);
2373 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2374 if (error) {
2375 SHARED_REGION_TRACE_ERROR(
2376 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2377 "vnode_getattr(%p) failed (error=%d)\n",
2378 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2379 proc_getpid(p), p->p_comm,
2380 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2381 srfmp->vp->v_name,
2382 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2383 error));
2384 goto done;
2385 }
2386 if (va.va_uid != 0) {
2387 SHARED_REGION_TRACE_ERROR(
2388 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2389 "owned by uid=%d instead of 0\n",
2390 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2391 proc_getpid(p), p->p_comm,
2392 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2393 srfmp->vp->v_name, va.va_uid));
2394 error = EPERM;
2395 goto done;
2396 }
2397
2398 #if XNU_TARGET_OS_OSX && defined(__arm64__)
2399 after_root_check:
2400 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2401
2402 #if CONFIG_CSR
2403 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2404 VATTR_INIT(&va);
2405 VATTR_WANTED(&va, va_flags);
2406 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
2407 if (error) {
2408 SHARED_REGION_TRACE_ERROR(
2409 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2410 "vnode_getattr(%p) failed (error=%d)\n",
2411 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2412 proc_getpid(p), p->p_comm,
2413 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2414 srfmp->vp->v_name,
2415 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2416 error));
2417 goto done;
2418 }
2419
2420 if (!(va.va_flags & SF_RESTRICTED)) {
2421 /*
2422 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2423 * the shared cache file is NOT SIP-protected, so reject the
2424 * mapping request
2425 */
2426 SHARED_REGION_TRACE_ERROR(
2427 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2428 "vnode is not SIP-protected. \n",
2429 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2430 proc_getpid(p), p->p_comm,
2431 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2432 srfmp->vp->v_name));
2433 error = EPERM;
2434 goto done;
2435 }
2436 }
2437 #else /* CONFIG_CSR */
2438 /* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */
2439
2440 assert(rdir_vp != NULL);
2441 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2442 SHARED_REGION_TRACE_ERROR(
2443 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2444 "not on process's root volume\n",
2445 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2446 proc_getpid(p), p->p_comm,
2447 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2448 srfmp->vp->v_name));
2449 error = EPERM;
2450 goto done;
2451 }
2452 #endif /* CONFIG_CSR */
2453
2454 if (scdir_enforce) {
2455 /* ensure parent is scdir_vp */
2456 assert(*scdir_vp != NULL);
2457 if (vnode_parent(srfmp->vp) != *scdir_vp) {
2458 SHARED_REGION_TRACE_ERROR(
2459 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2460 "shared cache file not in %s\n",
2461 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2462 proc_getpid(p), p->p_comm,
2463 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2464 srfmp->vp->v_name, expected_scdir_path));
2465 error = EPERM;
2466 goto done;
2467 }
2468 }
2469
2470 /* get vnode size */
2471 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2472 if (error) {
2473 SHARED_REGION_TRACE_ERROR(
2474 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2475 "vnode_size(%p) failed (error=%d)\n",
2476 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2477 proc_getpid(p), p->p_comm,
2478 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2479 srfmp->vp->v_name,
2480 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2481 goto done;
2482 }
2483 srfmp->file_size = fs;
2484
2485 /* get the file's memory object handle */
2486 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2487 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2488 SHARED_REGION_TRACE_ERROR(
2489 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2490 "no memory object\n",
2491 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2492 proc_getpid(p), p->p_comm,
2493 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2494 srfmp->vp->v_name));
2495 error = EINVAL;
2496 goto done;
2497 }
2498
2499 /* check that the mappings are properly covered by code signatures */
2500 if (!cs_system_enforcement()) {
2501 /* code signing is not enforced: no need to check */
2502 } else {
2503 for (i = 0; i < srfmp->mappings_count; i++) {
2504 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2505 /* zero-filled mapping: not backed by the file */
2506 continue;
2507 }
2508 if (ubc_cs_is_range_codesigned(srfmp->vp,
2509 srfmp->mappings[i].sms_file_offset,
2510 srfmp->mappings[i].sms_size)) {
2511 /* this mapping is fully covered by code signatures */
2512 continue;
2513 }
2514 SHARED_REGION_TRACE_ERROR(
2515 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2516 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2517 "is not code-signed\n",
2518 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2519 proc_getpid(p), p->p_comm,
2520 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2521 srfmp->vp->v_name,
2522 i, srfmp->mappings_count,
2523 srfmp->mappings[i].sms_address,
2524 srfmp->mappings[i].sms_size,
2525 srfmp->mappings[i].sms_file_offset,
2526 srfmp->mappings[i].sms_max_prot,
2527 srfmp->mappings[i].sms_init_prot));
2528 error = EINVAL;
2529 goto done;
2530 }
2531 }
2532 }
2533 done:
2534 if (error != 0) {
2535 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region, *scdir_vp);
2536 *sr_file_mappings = NULL;
2537 *shared_region_ptr = NULL;
2538 *scdir_vp = NULL;
2539 }
2540 return error;
2541 }
2542
2543 /*
2544 * shared_region_map_np()
2545 *
2546 * This system call is intended for dyld.
2547 *
2548 * dyld uses this to map a shared cache file into a shared region.
2549 * This is usually done only the first time a shared cache is needed.
2550 * Subsequent processes will just use the populated shared region without
2551 * requiring any further setup.
2552 */
2553 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)2554 _shared_region_map_and_slide(
2555 struct proc *p,
2556 uint32_t files_count,
2557 struct shared_file_np *files,
2558 uint32_t mappings_count,
2559 struct shared_file_mapping_slide_np *mappings)
2560 {
2561 int error = 0;
2562 kern_return_t kr = KERN_SUCCESS;
2563 struct _sr_file_mappings *sr_file_mappings = NULL;
2564 struct vnode *scdir_vp = NULL;
2565 struct vnode *rdir_vp = NULL;
2566 struct vm_shared_region *shared_region = NULL;
2567
2568 /*
2569 * Get a reference to the current proc's root dir.
2570 * Need this to prevent racing with chroot.
2571 */
2572 proc_fdlock(p);
2573 rdir_vp = p->p_fd.fd_rdir;
2574 if (rdir_vp == NULL) {
2575 rdir_vp = rootvnode;
2576 }
2577 assert(rdir_vp != NULL);
2578 vnode_get(rdir_vp);
2579 proc_fdunlock(p);
2580
2581 /*
2582 * Turn files, mappings into sr_file_mappings and other setup.
2583 */
2584 error = shared_region_map_and_slide_setup(p, files_count,
2585 files, mappings_count, mappings,
2586 &sr_file_mappings, &shared_region, &scdir_vp, rdir_vp);
2587 if (error != 0) {
2588 vnode_put(rdir_vp);
2589 return error;
2590 }
2591
2592 /* map the file(s) into that shared region's submap */
2593 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
2594 if (kr != KERN_SUCCESS) {
2595 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2596 "vm_shared_region_map_file() failed kr=0x%x\n",
2597 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2598 proc_getpid(p), p->p_comm, kr));
2599 }
2600
2601 /* convert kern_return_t to errno */
2602 switch (kr) {
2603 case KERN_SUCCESS:
2604 error = 0;
2605 break;
2606 case KERN_INVALID_ADDRESS:
2607 error = EFAULT;
2608 break;
2609 case KERN_PROTECTION_FAILURE:
2610 error = EPERM;
2611 break;
2612 case KERN_NO_SPACE:
2613 error = ENOMEM;
2614 break;
2615 case KERN_FAILURE:
2616 case KERN_INVALID_ARGUMENT:
2617 default:
2618 error = EINVAL;
2619 break;
2620 }
2621
2622 /*
2623 * Mark that this process is now using split libraries.
2624 */
2625 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2626 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2627 }
2628
2629 vnode_put(rdir_vp);
2630 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region, scdir_vp);
2631
2632 SHARED_REGION_TRACE_DEBUG(
2633 ("shared_region: %p [%d(%s)] <- map\n",
2634 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2635 proc_getpid(p), p->p_comm));
2636
2637 return error;
2638 }
2639
2640 /*
2641 * Clean up part of _shared_region_map_and_slide()
2642 * It had to be broken out of _shared_region_map_and_slide() to
2643 * prevent compiler inlining from blowing out the stack.
2644 */
2645 __attribute__((noinline))
2646 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region,struct vnode * scdir_vp)2647 shared_region_map_and_slide_cleanup(
2648 struct proc *p,
2649 uint32_t files_count,
2650 struct _sr_file_mappings *sr_file_mappings,
2651 struct vm_shared_region *shared_region,
2652 struct vnode *scdir_vp)
2653 {
2654 struct _sr_file_mappings *srfmp;
2655 struct vnode_attr va;
2656
2657 if (sr_file_mappings != NULL) {
2658 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2659 if (srfmp->vp != NULL) {
2660 vnode_lock_spin(srfmp->vp);
2661 srfmp->vp->v_flag |= VSHARED_DYLD;
2662 vnode_unlock(srfmp->vp);
2663
2664 /* update the vnode's access time */
2665 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2666 VATTR_INIT(&va);
2667 nanotime(&va.va_access_time);
2668 VATTR_SET_ACTIVE(&va, va_access_time);
2669 vnode_setattr(srfmp->vp, &va, vfs_context_current());
2670 }
2671
2672 #if NAMEDSTREAMS
2673 /*
2674 * If the shared cache is compressed, it may
2675 * have a namedstream vnode instantiated for
2676 * for it. That namedstream vnode will also
2677 * have to be marked with VSHARED_DYLD.
2678 */
2679 if (vnode_hasnamedstreams(srfmp->vp)) {
2680 vnode_t svp;
2681 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2682 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2683 vnode_lock_spin(svp);
2684 svp->v_flag |= VSHARED_DYLD;
2685 vnode_unlock(svp);
2686 vnode_put(svp);
2687 }
2688 }
2689 #endif /* NAMEDSTREAMS */
2690 /*
2691 * release the vnode...
2692 * ubc_map() still holds it for us in the non-error case
2693 */
2694 (void) vnode_put(srfmp->vp);
2695 srfmp->vp = NULL;
2696 }
2697 if (srfmp->fp != NULL) {
2698 /* release the file descriptor */
2699 fp_drop(p, srfmp->fd, srfmp->fp, 0);
2700 srfmp->fp = NULL;
2701 }
2702 }
2703 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2704 }
2705
2706 if (scdir_vp != NULL) {
2707 (void)vnode_put(scdir_vp);
2708 scdir_vp = NULL;
2709 }
2710
2711 if (shared_region != NULL) {
2712 vm_shared_region_deallocate(shared_region);
2713 }
2714 }
2715
2716
2717 /*
2718 * For each file mapped, we may have mappings for:
2719 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2720 * so let's round up to 8 mappings per file.
2721 */
2722 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2723
2724 /*
2725 * This is the older interface that dyld uses to map in the shared
2726 * library. dyld is slowly moving to the new shared_region_map_and_slide_2_np()
2727 * call as needed.
2728 */
2729 int
shared_region_map_and_slide_np(struct proc * p,struct shared_region_map_and_slide_np_args * uap,__unused int * retvalp)2730 shared_region_map_and_slide_np(
2731 struct proc *p,
2732 struct shared_region_map_and_slide_np_args *uap,
2733 __unused int *retvalp)
2734 {
2735 unsigned int mappings_count = uap->count;
2736 unsigned int m;
2737 uint32_t slide = uap->slide;
2738 struct shared_file_np shared_files[1];
2739 struct shared_file_mapping_np legacy_mapping;
2740 struct shared_file_mapping_slide_np *mappings = NULL;
2741 kern_return_t kr = KERN_SUCCESS;
2742
2743 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
2744 if (kr == KERN_INVALID_ARGUMENT) {
2745 /*
2746 * This will happen if we request sliding again
2747 * with the same slide value that was used earlier
2748 * for the very first sliding.
2749 */
2750 kr = KERN_SUCCESS;
2751 }
2752 goto done;
2753 }
2754
2755 if (mappings_count == 0) {
2756 SHARED_REGION_TRACE_INFO(
2757 ("shared_region: %p [%d(%s)] map(): "
2758 "no mappings\n",
2759 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2760 proc_getpid(p), p->p_comm));
2761 kr = 0; /* no mappings: we're done ! */
2762 goto done;
2763 } else if (mappings_count <= SFM_MAX) {
2764 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2765 if (mappings == NULL) {
2766 kr = KERN_RESOURCE_SHORTAGE;
2767 goto done;
2768 }
2769 } else {
2770 SHARED_REGION_TRACE_ERROR(
2771 ("shared_region: %p [%d(%s)] map(): "
2772 "too many mappings (%d) max %d\n",
2773 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2774 proc_getpid(p), p->p_comm,
2775 mappings_count, SFM_MAX));
2776 kr = KERN_FAILURE;
2777 goto done;
2778 }
2779
2780 /*
2781 * Read in the mappings and translate to new format.
2782 */
2783 for (m = 0; m < mappings_count; ++m) {
2784 user_addr_t from_uaddr = uap->mappings + (m * sizeof(struct shared_file_mapping_np));
2785 kr = shared_region_copyin(p, from_uaddr, 1, sizeof(legacy_mapping), &legacy_mapping);
2786 if (kr != 0) {
2787 goto done;
2788 }
2789 mappings[m].sms_address = legacy_mapping.sfm_address;
2790 mappings[m].sms_size = legacy_mapping.sfm_size;
2791 mappings[m].sms_file_offset = legacy_mapping.sfm_file_offset;
2792 mappings[m].sms_max_prot = legacy_mapping.sfm_max_prot;
2793 mappings[m].sms_init_prot = legacy_mapping.sfm_init_prot;
2794 mappings[m].sms_slide_size = uap->slide_size;
2795 mappings[m].sms_slide_start = uap->slide_start;
2796 }
2797
2798 bzero(shared_files, sizeof(shared_files));
2799 shared_files[0].sf_fd = uap->fd;
2800 shared_files[0].sf_mappings_count = mappings_count;
2801 shared_files[0].sf_slide = slide;
2802
2803 kr = _shared_region_map_and_slide(p,
2804 1, /* # of files to map */
2805 &shared_files[0], /* files to map */
2806 mappings_count,
2807 mappings);
2808
2809 done:
2810 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2811 return kr;
2812 }
2813
2814 /*
2815 * This is the new interface for setting up shared region mappings.
2816 *
2817 * The slide used for shared regions setup using this interface is done differently
2818 * from the old interface. The slide value passed in the shared_files_np represents
2819 * a max value. The kernel will choose a random value based on that, then use it
2820 * for all shared regions.
2821 */
2822 #if defined (__x86_64__)
2823 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2824 #else
2825 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2826 #endif
2827
2828 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)2829 shared_region_map_and_slide_2_np(
2830 struct proc *p,
2831 struct shared_region_map_and_slide_2_np_args *uap,
2832 __unused int *retvalp)
2833 {
2834 unsigned int files_count;
2835 struct shared_file_np *shared_files = NULL;
2836 unsigned int mappings_count;
2837 struct shared_file_mapping_slide_np *mappings = NULL;
2838 kern_return_t kr = KERN_SUCCESS;
2839
2840 files_count = uap->files_count;
2841 mappings_count = uap->mappings_count;
2842
2843 if (files_count == 0) {
2844 SHARED_REGION_TRACE_INFO(
2845 ("shared_region: %p [%d(%s)] map(): "
2846 "no files\n",
2847 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2848 proc_getpid(p), p->p_comm));
2849 kr = 0; /* no files to map: we're done ! */
2850 goto done;
2851 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2852 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2853 if (shared_files == NULL) {
2854 kr = KERN_RESOURCE_SHORTAGE;
2855 goto done;
2856 }
2857 } else {
2858 SHARED_REGION_TRACE_ERROR(
2859 ("shared_region: %p [%d(%s)] map(): "
2860 "too many files (%d) max %d\n",
2861 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2862 proc_getpid(p), p->p_comm,
2863 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2864 kr = KERN_FAILURE;
2865 goto done;
2866 }
2867
2868 if (mappings_count == 0) {
2869 SHARED_REGION_TRACE_INFO(
2870 ("shared_region: %p [%d(%s)] map(): "
2871 "no mappings\n",
2872 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2873 proc_getpid(p), p->p_comm));
2874 kr = 0; /* no mappings: we're done ! */
2875 goto done;
2876 } else if (mappings_count <= SFM_MAX) {
2877 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2878 if (mappings == NULL) {
2879 kr = KERN_RESOURCE_SHORTAGE;
2880 goto done;
2881 }
2882 } else {
2883 SHARED_REGION_TRACE_ERROR(
2884 ("shared_region: %p [%d(%s)] map(): "
2885 "too many mappings (%d) max %d\n",
2886 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2887 proc_getpid(p), p->p_comm,
2888 mappings_count, SFM_MAX));
2889 kr = KERN_FAILURE;
2890 goto done;
2891 }
2892
2893 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2894 if (kr != KERN_SUCCESS) {
2895 goto done;
2896 }
2897
2898 kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings);
2899 if (kr != KERN_SUCCESS) {
2900 goto done;
2901 }
2902
2903 uint32_t max_slide = shared_files[0].sf_slide;
2904 uint32_t random_val;
2905 uint32_t slide_amount;
2906
2907 if (max_slide != 0) {
2908 read_random(&random_val, sizeof random_val);
2909 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2910 } else {
2911 slide_amount = 0;
2912 }
2913 #if DEVELOPMENT || DEBUG
2914 extern bool bootarg_disable_aslr;
2915 if (bootarg_disable_aslr) {
2916 slide_amount = 0;
2917 }
2918 #endif /* DEVELOPMENT || DEBUG */
2919
2920 /*
2921 * Fix up the mappings to reflect the desired slide.
2922 */
2923 unsigned int f;
2924 unsigned int m = 0;
2925 unsigned int i;
2926 for (f = 0; f < files_count; ++f) {
2927 shared_files[f].sf_slide = slide_amount;
2928 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2929 if (m >= mappings_count) {
2930 SHARED_REGION_TRACE_ERROR(
2931 ("shared_region: %p [%d(%s)] map(): "
2932 "mapping count argument was too small\n",
2933 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2934 proc_getpid(p), p->p_comm));
2935 kr = KERN_FAILURE;
2936 goto done;
2937 }
2938 mappings[m].sms_address += slide_amount;
2939 if (mappings[m].sms_slide_size != 0) {
2940 mappings[m].sms_slide_start += slide_amount;
2941 }
2942 }
2943 }
2944
2945 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2946 done:
2947 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2948 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2949 return kr;
2950 }
2951
2952 /* sysctl overflow room */
2953
2954 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2955 (int *) &page_size, 0, "vm page size");
2956
2957 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2958 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2959 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2960 extern unsigned int vm_page_free_target;
2961 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2962 &vm_page_free_target, 0, "Pageout daemon free target");
2963
2964 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2965 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2966
2967 static int
2968 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2969 {
2970 #pragma unused(oidp, arg1, arg2)
2971 unsigned int page_free_wanted;
2972
2973 page_free_wanted = mach_vm_ctl_page_free_wanted();
2974 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2975 }
2976 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2977 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2978 0, 0, vm_ctl_page_free_wanted, "I", "");
2979
2980 extern unsigned int vm_page_purgeable_count;
2981 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2982 &vm_page_purgeable_count, 0, "Purgeable page count");
2983
2984 extern unsigned int vm_page_purgeable_wired_count;
2985 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2986 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2987
2988 extern unsigned int vm_page_kern_lpage_count;
2989 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2990 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2991
2992 #if DEVELOPMENT || DEBUG
2993 #if __ARM_MIXED_PAGE_SIZE__
2994 static int vm_mixed_pagesize_supported = 1;
2995 #else
2996 static int vm_mixed_pagesize_supported = 0;
2997 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2998 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2999 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3000
3001 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3002 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3003 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3004 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3005
3006 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3007 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3008 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3009 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3010 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3011 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3012
3013 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3014 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3015 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3016 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3017 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3018 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3019 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3020 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3021 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3022 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3023 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3024 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3025 #endif /* DEVELOPMENT || DEBUG */
3026
3027 extern int madvise_free_debug;
3028 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3029 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3030
3031 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3032 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3033 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3034 &vm_page_stats_reusable.reusable_pages_success, "");
3035 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3036 &vm_page_stats_reusable.reusable_pages_failure, "");
3037 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3038 &vm_page_stats_reusable.reusable_pages_shared, "");
3039 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3040 &vm_page_stats_reusable.all_reusable_calls, "");
3041 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3042 &vm_page_stats_reusable.partial_reusable_calls, "");
3043 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3044 &vm_page_stats_reusable.reuse_pages_success, "");
3045 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3046 &vm_page_stats_reusable.reuse_pages_failure, "");
3047 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3048 &vm_page_stats_reusable.all_reuse_calls, "");
3049 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3050 &vm_page_stats_reusable.partial_reuse_calls, "");
3051 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3052 &vm_page_stats_reusable.can_reuse_success, "");
3053 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3054 &vm_page_stats_reusable.can_reuse_failure, "");
3055 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3056 &vm_page_stats_reusable.reusable_reclaimed, "");
3057 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3058 &vm_page_stats_reusable.reusable_nonwritable, "");
3059 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3060 &vm_page_stats_reusable.reusable_shared, "");
3061 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3062 &vm_page_stats_reusable.free_shared, "");
3063
3064
3065 extern unsigned int vm_page_free_count, vm_page_speculative_count;
3066 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3067 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3068
3069 extern unsigned int vm_page_cleaned_count;
3070 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3071
3072 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3073 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3074 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3075
3076 /* pageout counts */
3077 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3078 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3079
3080 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3081 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3082 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3083 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3084 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3085 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3086
3087
3088 /* counts of pages prefaulted when entering a memory object */
3089 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3090 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3091 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3092
3093 #if defined (__x86_64__)
3094 extern unsigned int vm_clump_promote_threshold;
3095 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3096 #if DEVELOPMENT || DEBUG
3097 extern unsigned long vm_clump_stats[];
3098 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3099 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3100 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3101 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3102 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3103 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3104 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3105 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3106 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3107 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3108 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3109 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3110 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3111 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3112 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3113 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3114 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3115 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3116 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3117 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3118 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3119 #endif /* if DEVELOPMENT || DEBUG */
3120 #endif /* #if defined (__x86_64__) */
3121
3122 #if CONFIG_SECLUDED_MEMORY
3123
3124 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3125 extern unsigned int vm_page_secluded_target;
3126 extern unsigned int vm_page_secluded_count;
3127 extern unsigned int vm_page_secluded_count_free;
3128 extern unsigned int vm_page_secluded_count_inuse;
3129 extern unsigned int vm_page_secluded_count_over_target;
3130 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3131 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3132 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3133 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3134 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3135
3136 extern struct vm_page_secluded_data vm_page_secluded;
3137 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3138 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3139 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3140 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3141 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3142 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3143 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3144 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3145
3146 #endif /* CONFIG_SECLUDED_MEMORY */
3147
3148 #include <kern/thread.h>
3149 #include <sys/user.h>
3150
3151 void vm_pageout_io_throttle(void);
3152
3153 void
vm_pageout_io_throttle(void)3154 vm_pageout_io_throttle(void)
3155 {
3156 struct uthread *uthread = current_uthread();
3157
3158 /*
3159 * thread is marked as a low priority I/O type
3160 * and the I/O we issued while in this cleaning operation
3161 * collided with normal I/O operations... we'll
3162 * delay in order to mitigate the impact of this
3163 * task on the normal operation of the system
3164 */
3165
3166 if (uthread->uu_lowpri_window) {
3167 throttle_lowpri_io(1);
3168 }
3169 }
3170
3171 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)3172 vm_pressure_monitor(
3173 __unused struct proc *p,
3174 struct vm_pressure_monitor_args *uap,
3175 int *retval)
3176 {
3177 kern_return_t kr;
3178 uint32_t pages_reclaimed;
3179 uint32_t pages_wanted;
3180
3181 kr = mach_vm_pressure_monitor(
3182 (boolean_t) uap->wait_for_pressure,
3183 uap->nsecs_monitored,
3184 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3185 &pages_wanted);
3186
3187 switch (kr) {
3188 case KERN_SUCCESS:
3189 break;
3190 case KERN_ABORTED:
3191 return EINTR;
3192 default:
3193 return EINVAL;
3194 }
3195
3196 if (uap->pages_reclaimed) {
3197 if (copyout((void *)&pages_reclaimed,
3198 uap->pages_reclaimed,
3199 sizeof(pages_reclaimed)) != 0) {
3200 return EFAULT;
3201 }
3202 }
3203
3204 *retval = (int) pages_wanted;
3205 return 0;
3206 }
3207
3208 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)3209 kas_info(struct proc *p,
3210 struct kas_info_args *uap,
3211 int *retval __unused)
3212 {
3213 #ifndef CONFIG_KAS_INFO
3214 (void)p;
3215 (void)uap;
3216 return ENOTSUP;
3217 #else /* CONFIG_KAS_INFO */
3218 int selector = uap->selector;
3219 user_addr_t valuep = uap->value;
3220 user_addr_t sizep = uap->size;
3221 user_size_t size, rsize;
3222 int error;
3223
3224 if (!kauth_cred_issuser(kauth_cred_get())) {
3225 return EPERM;
3226 }
3227
3228 #if CONFIG_MACF
3229 error = mac_system_check_kas_info(kauth_cred_get(), selector);
3230 if (error) {
3231 return error;
3232 }
3233 #endif
3234
3235 if (IS_64BIT_PROCESS(p)) {
3236 user64_size_t size64;
3237 error = copyin(sizep, &size64, sizeof(size64));
3238 size = (user_size_t)size64;
3239 } else {
3240 user32_size_t size32;
3241 error = copyin(sizep, &size32, sizeof(size32));
3242 size = (user_size_t)size32;
3243 }
3244 if (error) {
3245 return error;
3246 }
3247
3248 switch (selector) {
3249 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3250 {
3251 uint64_t slide = vm_kernel_slide;
3252
3253 if (sizeof(slide) != size) {
3254 return EINVAL;
3255 }
3256
3257 error = copyout(&slide, valuep, sizeof(slide));
3258 if (error) {
3259 return error;
3260 }
3261 rsize = size;
3262 }
3263 break;
3264 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3265 {
3266 uint32_t i;
3267 kernel_mach_header_t *mh = &_mh_execute_header;
3268 struct load_command *cmd;
3269 cmd = (struct load_command*) &mh[1];
3270 uint64_t *bases;
3271 rsize = mh->ncmds * sizeof(uint64_t);
3272
3273 /*
3274 * Return the size if no data was passed
3275 */
3276 if (valuep == 0) {
3277 break;
3278 }
3279
3280 if (rsize > size) {
3281 return EINVAL;
3282 }
3283
3284 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3285
3286 for (i = 0; i < mh->ncmds; i++) {
3287 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3288 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3289 bases[i] = (uint64_t)sg->vmaddr;
3290 }
3291 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3292 }
3293
3294 error = copyout(bases, valuep, rsize);
3295
3296 kfree_data(bases, rsize);
3297
3298 if (error) {
3299 return error;
3300 }
3301 }
3302 break;
3303 default:
3304 return EINVAL;
3305 }
3306
3307 if (IS_64BIT_PROCESS(p)) {
3308 user64_size_t size64 = (user64_size_t)rsize;
3309 error = copyout(&size64, sizep, sizeof(size64));
3310 } else {
3311 user32_size_t size32 = (user32_size_t)rsize;
3312 error = copyout(&size32, sizep, sizeof(size32));
3313 }
3314
3315 return error;
3316 #endif /* CONFIG_KAS_INFO */
3317 }
3318
3319 #if __has_feature(ptrauth_calls)
3320 /*
3321 * Generate a random pointer signing key that isn't 0.
3322 */
3323 uint64_t
generate_jop_key(void)3324 generate_jop_key(void)
3325 {
3326 uint64_t key;
3327
3328 do {
3329 read_random(&key, sizeof key);
3330 } while (key == 0);
3331 return key;
3332 }
3333 #endif /* __has_feature(ptrauth_calls) */
3334
3335
3336 #pragma clang diagnostic push
3337 #pragma clang diagnostic ignored "-Wcast-qual"
3338 #pragma clang diagnostic ignored "-Wunused-function"
3339
3340 static void
asserts()3341 asserts()
3342 {
3343 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3344 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3345 }
3346
3347 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3348 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3349 #pragma clang diagnostic pop
3350
3351 extern uint32_t vm_page_pages;
3352 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3353
3354 extern uint32_t vm_page_busy_absent_skipped;
3355 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3356
3357 extern uint32_t vm_page_upl_tainted;
3358 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3359
3360 extern uint32_t vm_page_iopl_tainted;
3361 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3362
3363 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
3364 extern int vm_footprint_suspend_allowed;
3365 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3366
3367 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3368 static int
3369 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3370 {
3371 #pragma unused(oidp, arg1, arg2)
3372 int error = 0;
3373 int new_value;
3374
3375 if (req->newptr == USER_ADDR_NULL) {
3376 return 0;
3377 }
3378 error = SYSCTL_IN(req, &new_value, sizeof(int));
3379 if (error) {
3380 return error;
3381 }
3382 if (!vm_footprint_suspend_allowed) {
3383 if (new_value != 0) {
3384 /* suspends are not allowed... */
3385 return 0;
3386 }
3387 /* ... but let resumes proceed */
3388 }
3389 DTRACE_VM2(footprint_suspend,
3390 vm_map_t, current_map(),
3391 int, new_value);
3392
3393 pmap_footprint_suspend(current_map(), new_value);
3394
3395 return 0;
3396 }
3397 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3398 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3399 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3400 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
3401
3402 extern uint64_t vm_map_corpse_footprint_count;
3403 extern uint64_t vm_map_corpse_footprint_size_avg;
3404 extern uint64_t vm_map_corpse_footprint_size_max;
3405 extern uint64_t vm_map_corpse_footprint_full;
3406 extern uint64_t vm_map_corpse_footprint_no_buf;
3407 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3408 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3409 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3410 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3411 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3412 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3413 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3414 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3415 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3416 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3417
3418
3419 extern uint64_t shared_region_pager_copied;
3420 extern uint64_t shared_region_pager_slid;
3421 extern uint64_t shared_region_pager_slid_error;
3422 extern uint64_t shared_region_pager_reclaimed;
3423 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3424 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3425 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3426 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3427 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3428 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3429 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3430 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3431 extern int shared_region_destroy_delay;
3432 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3433 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3434
3435 #if MACH_ASSERT
3436 extern int pmap_ledgers_panic_leeway;
3437 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3438 #endif /* MACH_ASSERT */
3439
3440
3441 extern uint64_t vm_map_lookup_locked_copy_slowly_count;
3442 extern uint64_t vm_map_lookup_locked_copy_slowly_size;
3443 extern uint64_t vm_map_lookup_locked_copy_slowly_max;
3444 extern uint64_t vm_map_lookup_locked_copy_slowly_restart;
3445 extern uint64_t vm_map_lookup_locked_copy_slowly_error;
3446 extern uint64_t vm_map_lookup_locked_copy_strategically_count;
3447 extern uint64_t vm_map_lookup_locked_copy_strategically_size;
3448 extern uint64_t vm_map_lookup_locked_copy_strategically_max;
3449 extern uint64_t vm_map_lookup_locked_copy_strategically_restart;
3450 extern uint64_t vm_map_lookup_locked_copy_strategically_error;
3451 extern uint64_t vm_map_lookup_locked_copy_shadow_count;
3452 extern uint64_t vm_map_lookup_locked_copy_shadow_size;
3453 extern uint64_t vm_map_lookup_locked_copy_shadow_max;
3454 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3455 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_count, "");
3456 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3457 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_size, "");
3458 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3459 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_max, "");
3460 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3461 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_restart, "");
3462 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3463 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_error, "");
3464 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3465 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_count, "");
3466 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3467 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_size, "");
3468 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3469 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_max, "");
3470 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3471 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_restart, "");
3472 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3473 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_error, "");
3474 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3475 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_count, "");
3476 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3477 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_size, "");
3478 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3479 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_max, "");
3480
3481 extern int vm_protect_privileged_from_untrusted;
3482 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3483 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3484 extern uint64_t vm_copied_on_read;
3485 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3486 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3487
3488 extern int vm_shared_region_count;
3489 extern int vm_shared_region_peak;
3490 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3491 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3492 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3493 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3494 #if DEVELOPMENT || DEBUG
3495 extern unsigned int shared_region_pagers_resident_count;
3496 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3497 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3498 extern unsigned int shared_region_pagers_resident_peak;
3499 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3500 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3501 extern int shared_region_pager_count;
3502 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3503 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3504 #if __has_feature(ptrauth_calls)
3505 extern int shared_region_key_count;
3506 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3507 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3508 extern int vm_shared_region_reslide_count;
3509 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3510 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3511 #endif /* __has_feature(ptrauth_calls) */
3512 #endif /* DEVELOPMENT || DEBUG */
3513
3514 #if MACH_ASSERT
3515 extern int debug4k_filter;
3516 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3517 extern int debug4k_panic_on_terminate;
3518 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3519 extern int debug4k_panic_on_exception;
3520 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3521 extern int debug4k_panic_on_misaligned_sharing;
3522 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3523 #endif /* MACH_ASSERT */
3524
3525 extern uint64_t vm_map_set_size_limit_count;
3526 extern uint64_t vm_map_set_data_limit_count;
3527 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3528 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3529 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3530 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3531 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3532 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3533
3534 extern uint64_t vm_fault_resilient_media_initiate;
3535 extern uint64_t vm_fault_resilient_media_retry;
3536 extern uint64_t vm_fault_resilient_media_proceed;
3537 extern uint64_t vm_fault_resilient_media_release;
3538 extern uint64_t vm_fault_resilient_media_abort1;
3539 extern uint64_t vm_fault_resilient_media_abort2;
3540 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3541 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3542 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3543 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3544 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3545 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3546 #if MACH_ASSERT
3547 extern int vm_fault_resilient_media_inject_error1_rate;
3548 extern int vm_fault_resilient_media_inject_error1;
3549 extern int vm_fault_resilient_media_inject_error2_rate;
3550 extern int vm_fault_resilient_media_inject_error2;
3551 extern int vm_fault_resilient_media_inject_error3_rate;
3552 extern int vm_fault_resilient_media_inject_error3;
3553 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3554 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3555 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3556 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3557 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3558 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3559 #endif /* MACH_ASSERT */
3560
3561 /*
3562 * A sysctl which causes all existing shared regions to become stale. They
3563 * will no longer be used by anything new and will be torn down as soon as
3564 * the last existing user exits. A write of non-zero value causes that to happen.
3565 * This should only be used by launchd, so we check that this is initproc.
3566 */
3567 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3568 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3569 {
3570 unsigned int value = 0;
3571 int changed = 0;
3572 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3573 if (error || !changed) {
3574 return error;
3575 }
3576 if (current_proc() != initproc) {
3577 return EPERM;
3578 }
3579
3580 vm_shared_region_pivot();
3581
3582 return 0;
3583 }
3584
3585 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3586 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3587 0, 0, shared_region_pivot, "I", "");
3588
3589 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3590 &vmtc_total, 0, "total text page corruptions detected");
3591
3592 /*
3593 * sysctl to return the number of pages on retired_pages_object
3594 */
3595 static int
3596 retired_pages_count SYSCTL_HANDLER_ARGS
3597 {
3598 #pragma unused(arg1, arg2, oidp)
3599 extern uint32_t vm_retired_pages_count(void);
3600 uint32_t value = vm_retired_pages_count();
3601
3602 return SYSCTL_OUT(req, &value, sizeof(value));
3603 }
3604 SYSCTL_PROC(_vm, OID_AUTO, retired_pages_count, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3605 0, 0, &retired_pages_count, "I", "");
3606
3607 #if DEBUG || DEVELOPMENT
3608 /*
3609 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3610 * Used for testing text page self healing.
3611 */
3612 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3613 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3614 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3615 {
3616 uint64_t value = 0;
3617 int error = sysctl_handle_quad(oidp, &value, 0, req);
3618 if (error || !req->newptr) {
3619 return error;
3620 }
3621
3622 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3623 return 0;
3624 } else {
3625 return EINVAL;
3626 }
3627 }
3628
3629 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3630 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3631 0, 0, corrupt_text_addr, "-", "");
3632 #endif /* DEBUG || DEVELOPMENT */
3633
3634 extern uint64_t c_seg_filled_no_contention;
3635 extern uint64_t c_seg_filled_contention;
3636 extern clock_sec_t c_seg_filled_contention_sec_max;
3637 extern clock_nsec_t c_seg_filled_contention_nsec_max;
3638 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
3639 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
3640 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
3641 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
3642 #if (XNU_TARGET_OS_OSX && __arm64__)
3643 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
3644 extern int c_process_major_yield_after; /* yield after moving ? segments */
3645 extern uint64_t c_process_major_reports;
3646 extern clock_sec_t c_process_major_max_sec;
3647 extern clock_nsec_t c_process_major_max_nsec;
3648 extern uint32_t c_process_major_peak_segcount;
3649 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
3650 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
3651 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
3652 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
3653 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
3654 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
3655 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3656