1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97
98 #include <kern/kalloc.h>
99 #include <vm/vm_map_internal.h>
100 #include <vm/vm_kern_xnu.h>
101 #include <vm/vm_pageout_xnu.h>
102
103 #include <mach/shared_region.h>
104 #include <vm/vm_shared_region_internal.h>
105
106 #include <vm/vm_dyld_pager_internal.h>
107 #include <vm/vm_protos_internal.h>
108 #if DEVELOPMENT || DEBUG
109 #include <vm/vm_compressor_info.h> /* for c_segment_info */
110 #include <vm/vm_compressor_xnu.h> /* for vm_compressor_serialize_segment_debug_info() */
111 #endif
112 #include <vm/vm_reclaim_xnu.h>
113
114 #include <sys/kern_memorystatus.h>
115 #include <sys/kern_memorystatus_freeze.h>
116 #include <sys/proc_internal.h>
117
118 #include <mach-o/fixup-chains.h>
119
120 #if CONFIG_MACF
121 #include <security/mac_framework.h>
122 #endif
123
124 #include <kern/bits.h>
125
126 #if CONFIG_CSR
127 #include <sys/csr.h>
128 #endif /* CONFIG_CSR */
129 #include <sys/trust_caches.h>
130 #include <libkern/amfi/amfi.h>
131 #include <IOKit/IOBSD.h>
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136
137 #if DEVELOPMENT || DEBUG
138
139 extern int vm_object_cache_evict_all(void);
140 static int
141 sysctl_vm_object_cache_evict SYSCTL_HANDLER_ARGS
142 {
143 #pragma unused(arg1, arg2, req)
144 (void) vm_object_cache_evict_all();
145 return 0;
146 }
147
148 SYSCTL_PROC(_vm, OID_AUTO, object_cache_evict, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
149 0, 0, &sysctl_vm_object_cache_evict, "I", "");
150
151 static int
152 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
153 {
154 #pragma unused(arg1, arg2)
155 vm_offset_t kaddr;
156 kern_return_t kr;
157 int error = 0;
158 int size = 0;
159
160 error = sysctl_handle_int(oidp, &size, 0, req);
161 if (error || !req->newptr) {
162 return error;
163 }
164
165 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
166 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
167
168 if (kr == KERN_SUCCESS) {
169 kmem_free(kernel_map, kaddr, size);
170 }
171
172 return error;
173 }
174
175 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
176 0, 0, &sysctl_kmem_alloc_contig, "I", "");
177
178 extern int vm_region_footprint;
179 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
180
181 static int
182 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
183 {
184 #pragma unused(arg1, arg2, oidp)
185 kmem_gobj_stats stats = kmem_get_gobj_stats();
186
187 return SYSCTL_OUT(req, &stats, sizeof(stats));
188 }
189
190 SYSCTL_PROC(_vm, OID_AUTO, kmem_gobj_stats,
191 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
192 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
193
194 #endif /* DEVELOPMENT || DEBUG */
195
196 static int
197 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
198 {
199 #pragma unused(arg1, arg2, oidp)
200 int error = 0;
201 int value;
202
203 value = task_self_region_footprint();
204 error = SYSCTL_OUT(req, &value, sizeof(int));
205 if (error) {
206 return error;
207 }
208
209 if (!req->newptr) {
210 return 0;
211 }
212
213 error = SYSCTL_IN(req, &value, sizeof(int));
214 if (error) {
215 return error;
216 }
217 task_self_region_footprint_set(value);
218 return 0;
219 }
220 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
221
222 static int
223 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
224 {
225 #pragma unused(arg1, arg2, oidp)
226 int error = 0;
227 int value;
228
229 value = (1 << thread_self_region_page_shift());
230 error = SYSCTL_OUT(req, &value, sizeof(int));
231 if (error) {
232 return error;
233 }
234
235 if (!req->newptr) {
236 return 0;
237 }
238
239 error = SYSCTL_IN(req, &value, sizeof(int));
240 if (error) {
241 return error;
242 }
243
244 if (value != 0 && value != 4096 && value != 16384) {
245 return EINVAL;
246 }
247
248 #if !__ARM_MIXED_PAGE_SIZE__
249 if (value != vm_map_page_size(current_map())) {
250 return EINVAL;
251 }
252 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
253
254 thread_self_region_page_shift_set(bit_first(value));
255 return 0;
256 }
257 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
258
259 static int
260 sysctl_vm_self_region_info_flags SYSCTL_HANDLER_ARGS
261 {
262 #pragma unused(arg1, arg2, oidp)
263 int error = 0;
264 int value;
265 kern_return_t kr;
266
267 value = task_self_region_info_flags();
268 error = SYSCTL_OUT(req, &value, sizeof(int));
269 if (error) {
270 return error;
271 }
272
273 if (!req->newptr) {
274 return 0;
275 }
276
277 error = SYSCTL_IN(req, &value, sizeof(int));
278 if (error) {
279 return error;
280 }
281
282 kr = task_self_region_info_flags_set(value);
283 if (kr != KERN_SUCCESS) {
284 return EINVAL;
285 }
286
287 return 0;
288 }
289 SYSCTL_PROC(_vm, OID_AUTO, self_region_info_flags, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_info_flags, "I", "");
290
291
292 #if DEVELOPMENT || DEBUG
293 extern int panic_on_unsigned_execute;
294 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
295
296 extern int vm_log_xnu_user_debug;
297 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
298 #endif /* DEVELOPMENT || DEBUG */
299
300 extern int vm_log_map_delete_permanent_prot_none;
301 SYSCTL_INT(_vm, OID_AUTO, log_map_delete_permanent_prot_none, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_map_delete_permanent_prot_none, 0, "");
302
303 extern int cs_executable_create_upl;
304 extern int cs_executable_wire;
305 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
306 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
307
308 extern int apple_protect_pager_count;
309 extern int apple_protect_pager_count_mapped;
310 extern unsigned int apple_protect_pager_cache_limit;
311 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
312 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
313 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
314
315 #if DEVELOPMENT || DEBUG
316 extern int radar_20146450;
317 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
318
319 extern int macho_printf;
320 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
321
322 extern int apple_protect_pager_data_request_debug;
323 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
324
325 extern unsigned int vm_object_copy_delayed_paging_wait_disable;
326 EXPERIMENT_FACTOR_UINT(_vm, vm_object_copy_delayed_paging_wait_disable, &vm_object_copy_delayed_paging_wait_disable, FALSE, TRUE, "");
327
328 #if __arm64__
329 /* These are meant to support the page table accounting unit test. */
330 extern unsigned int arm_hardware_page_size;
331 extern unsigned int arm_pt_desc_size;
332 extern unsigned int arm_pt_root_size;
333 extern unsigned int inuse_user_tteroot_count;
334 extern unsigned int inuse_kernel_tteroot_count;
335 extern unsigned int inuse_user_ttepages_count;
336 extern unsigned int inuse_kernel_ttepages_count;
337 extern unsigned int inuse_user_ptepages_count;
338 extern unsigned int inuse_kernel_ptepages_count;
339 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
340 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
341 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
342 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
343 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
344 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
345 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
346 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
347 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
348 #if !CONFIG_SPTM
349 extern unsigned int free_page_size_tt_count;
350 extern unsigned int free_tt_count;
351 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
352 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
353 #endif
354 #if DEVELOPMENT || DEBUG
355 extern unsigned long pmap_asid_flushes;
356 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
357 extern unsigned long pmap_asid_hits;
358 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
359 extern unsigned long pmap_asid_misses;
360 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
361 extern unsigned long pmap_speculation_restrictions;
362 SYSCTL_ULONG(_vm, OID_AUTO, pmap_speculation_restrictions, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_speculation_restrictions, "");
363 #endif
364 #endif /* __arm64__ */
365 #endif /* DEVELOPMENT || DEBUG */
366
367 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
368 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
369 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
370 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
371 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
372 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
373 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
374 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
375 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
376 #if VM_SCAN_FOR_SHADOW_CHAIN
377 static int vm_shadow_max_enabled = 0; /* Disabled by default */
378 extern int proc_shadow_max(void);
379 static int
380 vm_shadow_max SYSCTL_HANDLER_ARGS
381 {
382 #pragma unused(arg1, arg2, oidp)
383 int value = 0;
384
385 if (vm_shadow_max_enabled) {
386 value = proc_shadow_max();
387 }
388
389 return SYSCTL_OUT(req, &value, sizeof(value));
390 }
391 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
392 0, 0, &vm_shadow_max, "I", "");
393
394 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
395
396 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
397
398 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
399
400 #if PAGE_SLEEP_WITH_INHERITOR
401 #if DEVELOPMENT || DEBUG
402 extern uint32_t page_worker_table_size;
403 SYSCTL_INT(_vm, OID_AUTO, page_worker_table_size, CTLFLAG_RD | CTLFLAG_LOCKED, &page_worker_table_size, 0, "");
404 SCALABLE_COUNTER_DECLARE(page_worker_hash_collisions);
405 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_hash_collisions, page_worker_hash_collisions, "");
406 SCALABLE_COUNTER_DECLARE(page_worker_inheritor_sleeps);
407 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_inheritor_sleeps, page_worker_inheritor_sleeps, "");
408 #endif /* DEVELOPMENT || DEBUG */
409 #endif /* PAGE_SLEEP_WITH_INHERITOR */
410
411 /*
412 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
413 */
414
415 #if DEVELOPMENT || DEBUG
416 extern int allow_stack_exec, allow_data_exec;
417
418 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
419 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
420
421 #endif /* DEVELOPMENT || DEBUG */
422
423 static const char *prot_values[] = {
424 "none",
425 "read-only",
426 "write-only",
427 "read-write",
428 "execute-only",
429 "read-execute",
430 "write-execute",
431 "read-write-execute"
432 };
433
434 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)435 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
436 {
437 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
438 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
439 }
440
441 /*
442 * shared_region_unnest_logging: level of logging of unnesting events
443 * 0 - no logging
444 * 1 - throttled logging of unexpected unnesting events (default)
445 * 2 - unthrottled logging of unexpected unnesting events
446 * 3+ - unthrottled logging of all unnesting events
447 */
448 int shared_region_unnest_logging = 1;
449
450 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
451 &shared_region_unnest_logging, 0, "");
452
453 int vm_shared_region_unnest_log_interval = 10;
454 int shared_region_unnest_log_count_threshold = 5;
455
456
457 #if XNU_TARGET_OS_OSX
458
459 #if defined (__x86_64__)
460 static int scdir_enforce = 1;
461 #else /* defined (__x86_64__) */
462 static int scdir_enforce = 0; /* AOT caches live elsewhere */
463 #endif /* defined (__x86_64__) */
464
465 static char *scdir_path[] = {
466 "/System/Library/dyld/",
467 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
468 "/System/Cryptexes/OS/System/Library/dyld",
469 NULL
470 };
471
472 #else /* XNU_TARGET_OS_OSX */
473
474 static int scdir_enforce = 0;
475 static char *scdir_path[] = {
476 "/System/Library/Caches/com.apple.dyld/",
477 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
478 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
479 NULL
480 };
481
482 #endif /* XNU_TARGET_OS_OSX */
483
484 static char *driverkit_scdir_path[] = {
485 "/System/DriverKit/System/Library/dyld/",
486 #if XNU_TARGET_OS_OSX
487 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
488 #else
489 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
490 #endif /* XNU_TARGET_OS_OSX */
491 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
492 NULL
493 };
494
495 #ifndef SECURE_KERNEL
496 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
497 {
498 #if CONFIG_CSR
499 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
500 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
501 return EPERM;
502 }
503 #endif /* CONFIG_CSR */
504 return sysctl_handle_int(oidp, arg1, arg2, req);
505 }
506
507 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
508 #endif
509
510 /* These log rate throttling state variables aren't thread safe, but
511 * are sufficient unto the task.
512 */
513 static int64_t last_unnest_log_time = 0;
514 static int shared_region_unnest_log_count = 0;
515
516 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)517 log_unnest_badness(
518 vm_map_t m,
519 vm_map_offset_t s,
520 vm_map_offset_t e,
521 boolean_t is_nested_map,
522 vm_map_offset_t lowest_unnestable_addr)
523 {
524 struct timeval tv;
525
526 if (shared_region_unnest_logging == 0) {
527 return;
528 }
529
530 if (shared_region_unnest_logging <= 2 &&
531 is_nested_map &&
532 s >= lowest_unnestable_addr) {
533 /*
534 * Unnesting of writable map entries is fine.
535 */
536 return;
537 }
538
539 if (shared_region_unnest_logging <= 1) {
540 microtime(&tv);
541 if ((tv.tv_sec - last_unnest_log_time) <
542 vm_shared_region_unnest_log_interval) {
543 if (shared_region_unnest_log_count++ >
544 shared_region_unnest_log_count_threshold) {
545 return;
546 }
547 } else {
548 last_unnest_log_time = tv.tv_sec;
549 shared_region_unnest_log_count = 0;
550 }
551 }
552
553 DTRACE_VM4(log_unnest_badness,
554 vm_map_t, m,
555 vm_map_offset_t, s,
556 vm_map_offset_t, e,
557 vm_map_offset_t, lowest_unnestable_addr);
558 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
559 }
560
561 uint64_t
vm_purge_filebacked_pagers(void)562 vm_purge_filebacked_pagers(void)
563 {
564 uint64_t pages_purged;
565
566 pages_purged = 0;
567 pages_purged += apple_protect_pager_purge_all();
568 pages_purged += shared_region_pager_purge_all();
569 pages_purged += dyld_pager_purge_all();
570 #if DEVELOPMENT || DEBUG
571 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
572 #endif /* DEVELOPMENT || DEBUG */
573 return pages_purged;
574 }
575
576 int
useracc(user_addr_ut addr_u,user_size_ut len_u,int prot)577 useracc(
578 user_addr_ut addr_u,
579 user_size_ut len_u,
580 int prot)
581 {
582 vm_map_t map;
583 vm_prot_t vm_prot = VM_PROT_WRITE;
584
585 map = current_map();
586
587 if (prot == B_READ) {
588 vm_prot = VM_PROT_READ;
589 }
590
591 return vm_map_check_protection(map, addr_u,
592 vm_sanitize_compute_ut_end(addr_u, len_u), vm_prot,
593 VM_SANITIZE_CALLER_USERACC);
594 }
595
596 #if XNU_PLATFORM_MacOSX
597 static __attribute__((always_inline, warn_unused_result))
598 kern_return_t
vslock_sanitize(vm_map_t map,user_addr_ut addr_u,user_size_ut len_u,vm_sanitize_caller_t vm_sanitize_caller,vm_map_offset_t * start,vm_map_offset_t * end,vm_map_size_t * size)599 vslock_sanitize(
600 vm_map_t map,
601 user_addr_ut addr_u,
602 user_size_ut len_u,
603 vm_sanitize_caller_t vm_sanitize_caller,
604 vm_map_offset_t *start,
605 vm_map_offset_t *end,
606 vm_map_size_t *size)
607 {
608 return vm_sanitize_addr_size(addr_u, len_u, vm_sanitize_caller,
609 map,
610 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS, start, end,
611 size);
612 }
613 #endif /* XNU_PLATFORM_MacOSX */
614
615 int
vslock(user_addr_ut addr,user_size_ut len)616 vslock(user_addr_ut addr, user_size_ut len)
617 {
618 kern_return_t kret;
619
620 #if XNU_PLATFORM_MacOSX
621 /*
622 * Preserve previous behavior on macOS for overflows due to bin
623 * compatibility i.e. return success for overflows without doing
624 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
625 * for overflow errors which gets converted to KERN_SUCCESS by
626 * vm_sanitize_get_kr.
627 */
628 vm_map_offset_t start, end;
629 vm_map_size_t size;
630
631 kret = vslock_sanitize(current_map(),
632 addr,
633 len,
634 VM_SANITIZE_CALLER_VSLOCK,
635 &start,
636 &end,
637 &size);
638 if (__improbable(kret != KERN_SUCCESS)) {
639 switch (vm_sanitize_get_kr(kret)) {
640 case KERN_SUCCESS:
641 return 0;
642 case KERN_INVALID_ADDRESS:
643 case KERN_NO_SPACE:
644 return ENOMEM;
645 case KERN_PROTECTION_FAILURE:
646 return EACCES;
647 default:
648 return EINVAL;
649 }
650 }
651 #endif /* XNU_PLATFORM_MacOSX */
652
653 kret = vm_map_wire_kernel(current_map(), addr,
654 vm_sanitize_compute_ut_end(addr, len),
655 vm_sanitize_wrap_prot(VM_PROT_READ | VM_PROT_WRITE),
656 VM_KERN_MEMORY_BSD,
657 FALSE);
658
659 switch (kret) {
660 case KERN_SUCCESS:
661 return 0;
662 case KERN_INVALID_ADDRESS:
663 case KERN_NO_SPACE:
664 return ENOMEM;
665 case KERN_PROTECTION_FAILURE:
666 return EACCES;
667 default:
668 return EINVAL;
669 }
670 }
671
672 int
vsunlock(user_addr_ut addr,user_size_ut len,__unused int dirtied)673 vsunlock(user_addr_ut addr, user_size_ut len, __unused int dirtied)
674 {
675 #if FIXME /* [ */
676 pmap_t pmap;
677 vm_page_t pg;
678 vm_map_offset_t vaddr;
679 ppnum_t paddr;
680 #endif /* FIXME ] */
681 kern_return_t kret;
682 vm_map_t map;
683
684 map = current_map();
685
686 #if FIXME /* [ */
687 if (dirtied) {
688 pmap = get_task_pmap(current_task());
689 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
690 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
691 vaddr += PAGE_SIZE) {
692 paddr = pmap_find_phys(pmap, vaddr);
693 pg = PHYS_TO_VM_PAGE(paddr);
694 vm_page_set_modified(pg);
695 }
696 }
697 #endif /* FIXME ] */
698 #ifdef lint
699 dirtied++;
700 #endif /* lint */
701
702 #if XNU_PLATFORM_MacOSX
703 /*
704 * Preserve previous behavior on macOS for overflows due to bin
705 * compatibility i.e. return success for overflows without doing
706 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
707 * for overflow errors which gets converted to KERN_SUCCESS by
708 * vm_sanitize_get_kr.
709 */
710 vm_map_offset_t start, end;
711 vm_map_size_t size;
712
713 kret = vslock_sanitize(map,
714 addr,
715 len,
716 VM_SANITIZE_CALLER_VSUNLOCK,
717 &start,
718 &end,
719 &size);
720 if (__improbable(kret != KERN_SUCCESS)) {
721 switch (vm_sanitize_get_kr(kret)) {
722 case KERN_SUCCESS:
723 return 0;
724 case KERN_INVALID_ADDRESS:
725 case KERN_NO_SPACE:
726 return ENOMEM;
727 case KERN_PROTECTION_FAILURE:
728 return EACCES;
729 default:
730 return EINVAL;
731 }
732 }
733 #endif /* XNU_PLATFORM_MacOSX */
734
735 kret = vm_map_unwire(map, addr,
736 vm_sanitize_compute_ut_end(addr, len), false);
737 switch (kret) {
738 case KERN_SUCCESS:
739 return 0;
740 case KERN_INVALID_ADDRESS:
741 case KERN_NO_SPACE:
742 return ENOMEM;
743 case KERN_PROTECTION_FAILURE:
744 return EACCES;
745 default:
746 return EINVAL;
747 }
748 }
749
750 int
subyte(user_addr_t addr,int byte)751 subyte(
752 user_addr_t addr,
753 int byte)
754 {
755 char character;
756
757 character = (char)byte;
758 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
759 }
760
761 int
suibyte(user_addr_t addr,int byte)762 suibyte(
763 user_addr_t addr,
764 int byte)
765 {
766 char character;
767
768 character = (char)byte;
769 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
770 }
771
772 int
fubyte(user_addr_t addr)773 fubyte(user_addr_t addr)
774 {
775 unsigned char byte;
776
777 if (copyin(addr, (void *) &byte, sizeof(char))) {
778 return -1;
779 }
780 return byte;
781 }
782
783 int
fuibyte(user_addr_t addr)784 fuibyte(user_addr_t addr)
785 {
786 unsigned char byte;
787
788 if (copyin(addr, (void *) &(byte), sizeof(char))) {
789 return -1;
790 }
791 return byte;
792 }
793
794 int
suword(user_addr_t addr,long word)795 suword(
796 user_addr_t addr,
797 long word)
798 {
799 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
800 }
801
802 long
fuword(user_addr_t addr)803 fuword(user_addr_t addr)
804 {
805 long word = 0;
806
807 if (copyin(addr, (void *) &word, sizeof(int))) {
808 return -1;
809 }
810 return word;
811 }
812
813 /* suiword and fuiword are the same as suword and fuword, respectively */
814
815 int
suiword(user_addr_t addr,long word)816 suiword(
817 user_addr_t addr,
818 long word)
819 {
820 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
821 }
822
823 long
fuiword(user_addr_t addr)824 fuiword(user_addr_t addr)
825 {
826 long word = 0;
827
828 if (copyin(addr, (void *) &word, sizeof(int))) {
829 return -1;
830 }
831 return word;
832 }
833
834 /*
835 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
836 * fetching and setting of process-sized size_t and pointer values.
837 */
838 int
sulong(user_addr_t addr,int64_t word)839 sulong(user_addr_t addr, int64_t word)
840 {
841 if (IS_64BIT_PROCESS(current_proc())) {
842 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
843 } else {
844 return suiword(addr, (long)word);
845 }
846 }
847
848 int64_t
fulong(user_addr_t addr)849 fulong(user_addr_t addr)
850 {
851 int64_t longword;
852
853 if (IS_64BIT_PROCESS(current_proc())) {
854 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
855 return -1;
856 }
857 return longword;
858 } else {
859 return (int64_t)fuiword(addr);
860 }
861 }
862
863 int
suulong(user_addr_t addr,uint64_t uword)864 suulong(user_addr_t addr, uint64_t uword)
865 {
866 if (IS_64BIT_PROCESS(current_proc())) {
867 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
868 } else {
869 return suiword(addr, (uint32_t)uword);
870 }
871 }
872
873 uint64_t
fuulong(user_addr_t addr)874 fuulong(user_addr_t addr)
875 {
876 uint64_t ulongword;
877
878 if (IS_64BIT_PROCESS(current_proc())) {
879 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
880 return -1ULL;
881 }
882 return ulongword;
883 } else {
884 return (uint64_t)fuiword(addr);
885 }
886 }
887
888 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)889 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
890 {
891 return ENOTSUP;
892 }
893
894 #if defined(SECURE_KERNEL)
895 static int kern_secure_kernel = 1;
896 #else
897 static int kern_secure_kernel = 0;
898 #endif
899
900 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
901 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
902 &shared_region_trace_level, 0, "");
903 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
904 &shared_region_version, 0, "");
905 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
906 &shared_region_persistence, 0, "");
907
908 /*
909 * shared_region_check_np:
910 *
911 * This system call is intended for dyld.
912 *
913 * dyld calls this when any process starts to see if the process's shared
914 * region is already set up and ready to use.
915 * This call returns the base address of the first mapping in the
916 * process's shared region's first mapping.
917 * dyld will then check what's mapped at that address.
918 *
919 * If the shared region is empty, dyld will then attempt to map the shared
920 * cache file in the shared region via the shared_region_map_np() system call.
921 *
922 * If something's already mapped in the shared region, dyld will check if it
923 * matches the shared cache it would like to use for that process.
924 * If it matches, evrything's ready and the process can proceed and use the
925 * shared region.
926 * If it doesn't match, dyld will unmap the shared region and map the shared
927 * cache into the process's address space via mmap().
928 *
929 * A NULL pointer argument can be used by dyld to indicate it has unmapped
930 * the shared region. We will remove the shared_region reference from the task.
931 *
932 * ERROR VALUES
933 * EINVAL no shared region
934 * ENOMEM shared region is empty
935 * EFAULT bad address for "start_address"
936 */
937 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)938 shared_region_check_np(
939 __unused struct proc *p,
940 struct shared_region_check_np_args *uap,
941 __unused int *retvalp)
942 {
943 vm_shared_region_t shared_region;
944 mach_vm_offset_t start_address = 0;
945 int error = 0;
946 kern_return_t kr;
947 task_t task = current_task();
948
949 SHARED_REGION_TRACE_DEBUG(
950 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
951 (void *)VM_KERNEL_ADDRPERM(current_thread()),
952 proc_getpid(p), p->p_comm,
953 (uint64_t)uap->start_address));
954
955 /*
956 * Special value of start_address used to indicate that map_with_linking() should
957 * no longer be allowed in this process
958 */
959 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
960 p->p_disallow_map_with_linking = TRUE;
961 return 0;
962 }
963
964 /* retrieve the current tasks's shared region */
965 shared_region = vm_shared_region_get(task);
966 if (shared_region != NULL) {
967 /*
968 * A NULL argument is used by dyld to indicate the task
969 * has unmapped its shared region.
970 */
971 if (uap->start_address == 0) {
972 /* unmap it first */
973 vm_shared_region_remove(task, shared_region);
974 vm_shared_region_set(task, NULL);
975 } else {
976 /* retrieve address of its first mapping... */
977 kr = vm_shared_region_start_address(shared_region, &start_address, task);
978 if (kr != KERN_SUCCESS) {
979 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
980 "check_np(0x%llx) "
981 "vm_shared_region_start_address() failed\n",
982 (void *)VM_KERNEL_ADDRPERM(current_thread()),
983 proc_getpid(p), p->p_comm,
984 (uint64_t)uap->start_address));
985 error = ENOMEM;
986 } else {
987 #if __has_feature(ptrauth_calls)
988 /*
989 * Remap any section of the shared library that
990 * has authenticated pointers into private memory.
991 */
992 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
993 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
994 "check_np(0x%llx) "
995 "vm_shared_region_auth_remap() failed\n",
996 (void *)VM_KERNEL_ADDRPERM(current_thread()),
997 proc_getpid(p), p->p_comm,
998 (uint64_t)uap->start_address));
999 error = ENOMEM;
1000 }
1001 #endif /* __has_feature(ptrauth_calls) */
1002
1003 /* ... and give it to the caller */
1004 if (error == 0) {
1005 error = copyout(&start_address,
1006 (user_addr_t) uap->start_address,
1007 sizeof(start_address));
1008 if (error != 0) {
1009 SHARED_REGION_TRACE_ERROR(
1010 ("shared_region: %p [%d(%s)] "
1011 "check_np(0x%llx) "
1012 "copyout(0x%llx) error %d\n",
1013 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1014 proc_getpid(p), p->p_comm,
1015 (uint64_t)uap->start_address, (uint64_t)start_address,
1016 error));
1017 }
1018 }
1019 }
1020 }
1021 vm_shared_region_deallocate(shared_region);
1022 } else {
1023 /* no shared region ! */
1024 error = EINVAL;
1025 }
1026
1027 SHARED_REGION_TRACE_DEBUG(
1028 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1029 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1030 proc_getpid(p), p->p_comm,
1031 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1032
1033 return error;
1034 }
1035
1036
1037 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)1038 shared_region_copyin(
1039 struct proc *p,
1040 user_addr_t user_addr,
1041 unsigned int count,
1042 unsigned int element_size,
1043 void *kernel_data)
1044 {
1045 int error = 0;
1046 vm_size_t size = count * element_size;
1047
1048 error = copyin(user_addr, kernel_data, size);
1049 if (error) {
1050 SHARED_REGION_TRACE_ERROR(
1051 ("shared_region: %p [%d(%s)] map(): "
1052 "copyin(0x%llx, %ld) failed (error=%d)\n",
1053 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1054 proc_getpid(p), p->p_comm,
1055 (uint64_t)user_addr, (long)size, error));
1056 }
1057 return error;
1058 }
1059
1060 /*
1061 * A reasonable upper limit to prevent overflow of allocation/copyin.
1062 */
1063 #define _SR_FILE_MAPPINGS_MAX_FILES 256
1064
1065 /* forward declaration */
1066 __attribute__((noinline))
1067 static void shared_region_map_and_slide_cleanup(
1068 struct proc *p,
1069 uint32_t files_count,
1070 struct _sr_file_mappings *sr_file_mappings,
1071 struct vm_shared_region *shared_region);
1072
1073 /*
1074 * Setup part of _shared_region_map_and_slide().
1075 * It had to be broken out of _shared_region_map_and_slide() to
1076 * prevent compiler inlining from blowing out the stack.
1077 */
1078 __attribute__((noinline))
1079 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)1080 shared_region_map_and_slide_setup(
1081 struct proc *p,
1082 uint32_t files_count,
1083 struct shared_file_np *files,
1084 uint32_t mappings_count,
1085 struct shared_file_mapping_slide_np *mappings,
1086 struct _sr_file_mappings **sr_file_mappings,
1087 struct vm_shared_region **shared_region_ptr,
1088 struct vnode *rdir_vp)
1089 {
1090 int error = 0;
1091 struct _sr_file_mappings *srfmp;
1092 uint32_t mappings_next;
1093 struct vnode_attr va;
1094 off_t fs;
1095 #if CONFIG_MACF
1096 vm_prot_t maxprot = VM_PROT_ALL;
1097 #endif
1098 uint32_t i;
1099 struct vm_shared_region *shared_region = NULL;
1100 boolean_t is_driverkit = task_is_driver(current_task());
1101
1102 SHARED_REGION_TRACE_DEBUG(
1103 ("shared_region: %p [%d(%s)] -> map\n",
1104 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1105 proc_getpid(p), p->p_comm));
1106
1107 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
1108 error = E2BIG;
1109 goto done;
1110 }
1111 if (files_count == 0) {
1112 error = EINVAL;
1113 goto done;
1114 }
1115 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
1116 Z_WAITOK | Z_ZERO);
1117 if (*sr_file_mappings == NULL) {
1118 error = ENOMEM;
1119 goto done;
1120 }
1121 mappings_next = 0;
1122 for (i = 0; i < files_count; i++) {
1123 srfmp = &(*sr_file_mappings)[i];
1124 srfmp->fd = files[i].sf_fd;
1125 srfmp->mappings_count = files[i].sf_mappings_count;
1126 srfmp->mappings = &mappings[mappings_next];
1127 mappings_next += srfmp->mappings_count;
1128 if (mappings_next > mappings_count) {
1129 error = EINVAL;
1130 goto done;
1131 }
1132 srfmp->slide = files[i].sf_slide;
1133 }
1134
1135 /* get the process's shared region (setup in vm_map_exec()) */
1136 shared_region = vm_shared_region_trim_and_get(current_task());
1137 *shared_region_ptr = shared_region;
1138 if (shared_region == NULL) {
1139 SHARED_REGION_TRACE_ERROR(
1140 ("shared_region: %p [%d(%s)] map(): "
1141 "no shared region\n",
1142 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1143 proc_getpid(p), p->p_comm));
1144 error = EINVAL;
1145 goto done;
1146 }
1147
1148 /*
1149 * Check the shared region matches the current root
1150 * directory of this process. Deny the mapping to
1151 * avoid tainting the shared region with something that
1152 * doesn't quite belong into it.
1153 */
1154 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
1155 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
1156 SHARED_REGION_TRACE_ERROR(
1157 ("shared_region: map(%p) root_dir mismatch\n",
1158 (void *)VM_KERNEL_ADDRPERM(current_thread())));
1159 error = EPERM;
1160 goto done;
1161 }
1162
1163
1164 for (srfmp = &(*sr_file_mappings)[0];
1165 srfmp < &(*sr_file_mappings)[files_count];
1166 srfmp++) {
1167 if (srfmp->mappings_count == 0) {
1168 /* no mappings here... */
1169 continue;
1170 }
1171
1172 /*
1173 * A file descriptor of -1 is used to indicate that the data
1174 * to be put in the shared region for this mapping comes directly
1175 * from the processes address space. Ensure we have proper alignments.
1176 */
1177 if (srfmp->fd == -1) {
1178 /* only allow one mapping per fd */
1179 if (srfmp->mappings_count > 1) {
1180 SHARED_REGION_TRACE_ERROR(
1181 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
1182 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1183 proc_getpid(p), p->p_comm));
1184 error = EINVAL;
1185 goto done;
1186 }
1187
1188 /*
1189 * The destination address and size must be page aligned.
1190 */
1191 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
1192 mach_vm_address_t dest_addr = mapping->sms_address;
1193 mach_vm_size_t map_size = mapping->sms_size;
1194 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
1195 SHARED_REGION_TRACE_ERROR(
1196 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
1197 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1198 proc_getpid(p), p->p_comm, dest_addr));
1199 error = EINVAL;
1200 goto done;
1201 }
1202 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
1203 SHARED_REGION_TRACE_ERROR(
1204 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
1205 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1206 proc_getpid(p), p->p_comm, map_size));
1207 error = EINVAL;
1208 goto done;
1209 }
1210 continue;
1211 }
1212
1213 /* get file structure from file descriptor */
1214 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
1215 if (error) {
1216 SHARED_REGION_TRACE_ERROR(
1217 ("shared_region: %p [%d(%s)] map: "
1218 "fd=%d lookup failed (error=%d)\n",
1219 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1220 proc_getpid(p), p->p_comm, srfmp->fd, error));
1221 goto done;
1222 }
1223
1224 /* we need at least read permission on the file */
1225 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
1226 SHARED_REGION_TRACE_ERROR(
1227 ("shared_region: %p [%d(%s)] map: "
1228 "fd=%d not readable\n",
1229 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1230 proc_getpid(p), p->p_comm, srfmp->fd));
1231 error = EPERM;
1232 goto done;
1233 }
1234
1235 /* get vnode from file structure */
1236 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
1237 if (error) {
1238 SHARED_REGION_TRACE_ERROR(
1239 ("shared_region: %p [%d(%s)] map: "
1240 "fd=%d getwithref failed (error=%d)\n",
1241 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1242 proc_getpid(p), p->p_comm, srfmp->fd, error));
1243 goto done;
1244 }
1245 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
1246
1247 /* make sure the vnode is a regular file */
1248 if (srfmp->vp->v_type != VREG) {
1249 SHARED_REGION_TRACE_ERROR(
1250 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1251 "not a file (type=%d)\n",
1252 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1253 proc_getpid(p), p->p_comm,
1254 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1255 srfmp->vp->v_name, srfmp->vp->v_type));
1256 error = EINVAL;
1257 goto done;
1258 }
1259
1260 #if CONFIG_MACF
1261 /* pass in 0 for the offset argument because AMFI does not need the offset
1262 * of the shared cache */
1263 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1264 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
1265 if (error) {
1266 goto done;
1267 }
1268 #endif /* MAC */
1269
1270 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1271 /*
1272 * Check if the shared cache is in the trust cache;
1273 * if so, we can skip the root ownership check.
1274 */
1275 #if DEVELOPMENT || DEBUG
1276 /*
1277 * Skip both root ownership and trust cache check if
1278 * enforcement is disabled.
1279 */
1280 if (!cs_system_enforcement()) {
1281 goto after_root_check;
1282 }
1283 #endif /* DEVELOPMENT || DEBUG */
1284 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
1285 if (blob == NULL) {
1286 SHARED_REGION_TRACE_ERROR(
1287 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1288 "missing CS blob\n",
1289 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1290 proc_getpid(p), p->p_comm,
1291 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1292 srfmp->vp->v_name));
1293 goto root_check;
1294 }
1295 const uint8_t *cdhash = csblob_get_cdhash(blob);
1296 if (cdhash == NULL) {
1297 SHARED_REGION_TRACE_ERROR(
1298 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1299 "missing cdhash\n",
1300 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1301 proc_getpid(p), p->p_comm,
1302 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1303 srfmp->vp->v_name));
1304 goto root_check;
1305 }
1306
1307 bool in_trust_cache = false;
1308 TrustCacheQueryToken_t qt;
1309 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
1310 TCType_t tc_type = kTCTypeInvalid;
1311 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
1312 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
1313 (tc_type == kTCTypeCryptex1BootOS ||
1314 tc_type == kTCTypeStatic ||
1315 tc_type == kTCTypeEngineering));
1316 }
1317 if (!in_trust_cache) {
1318 SHARED_REGION_TRACE_ERROR(
1319 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1320 "not in trust cache\n",
1321 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1322 proc_getpid(p), p->p_comm,
1323 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1324 srfmp->vp->v_name));
1325 goto root_check;
1326 }
1327 goto after_root_check;
1328 root_check:
1329 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1330
1331 /* The shared cache file must be owned by root */
1332 VATTR_INIT(&va);
1333 VATTR_WANTED(&va, va_uid);
1334 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1335 if (error) {
1336 SHARED_REGION_TRACE_ERROR(
1337 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1338 "vnode_getattr(%p) failed (error=%d)\n",
1339 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1340 proc_getpid(p), p->p_comm,
1341 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1342 srfmp->vp->v_name,
1343 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1344 error));
1345 goto done;
1346 }
1347 if (va.va_uid != 0) {
1348 SHARED_REGION_TRACE_ERROR(
1349 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1350 "owned by uid=%d instead of 0\n",
1351 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1352 proc_getpid(p), p->p_comm,
1353 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1354 srfmp->vp->v_name, va.va_uid));
1355 error = EPERM;
1356 goto done;
1357 }
1358
1359 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1360 after_root_check:
1361 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1362
1363 #if CONFIG_CSR
1364 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
1365 VATTR_INIT(&va);
1366 VATTR_WANTED(&va, va_flags);
1367 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1368 if (error) {
1369 SHARED_REGION_TRACE_ERROR(
1370 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1371 "vnode_getattr(%p) failed (error=%d)\n",
1372 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1373 proc_getpid(p), p->p_comm,
1374 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1375 srfmp->vp->v_name,
1376 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1377 error));
1378 goto done;
1379 }
1380
1381 if (!(va.va_flags & SF_RESTRICTED)) {
1382 /*
1383 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
1384 * the shared cache file is NOT SIP-protected, so reject the
1385 * mapping request
1386 */
1387 SHARED_REGION_TRACE_ERROR(
1388 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
1389 "vnode is not SIP-protected. \n",
1390 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1391 proc_getpid(p), p->p_comm,
1392 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1393 srfmp->vp->v_name));
1394 error = EPERM;
1395 goto done;
1396 }
1397 }
1398 #else /* CONFIG_CSR */
1399
1400 /*
1401 * Devices without SIP/ROSP need to make sure that the shared cache
1402 * is either on the root volume or in the preboot cryptex volume.
1403 */
1404 assert(rdir_vp != NULL);
1405 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
1406 vnode_t preboot_vp = NULL;
1407 #if XNU_TARGET_OS_OSX
1408 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
1409 #else
1410 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
1411 #endif
1412 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
1413 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
1414 SHARED_REGION_TRACE_ERROR(
1415 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1416 "not on process' root volume nor preboot volume\n",
1417 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1418 proc_getpid(p), p->p_comm,
1419 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1420 srfmp->vp->v_name));
1421 error = EPERM;
1422 if (preboot_vp) {
1423 (void)vnode_put(preboot_vp);
1424 }
1425 goto done;
1426 } else if (preboot_vp) {
1427 (void)vnode_put(preboot_vp);
1428 }
1429 }
1430 #endif /* CONFIG_CSR */
1431
1432 if (scdir_enforce) {
1433 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1434 struct vnode *scdir_vp = NULL;
1435 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1436 *expected_scdir_path != NULL;
1437 expected_scdir_path++) {
1438 /* get vnode for expected_scdir_path */
1439 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
1440 if (error) {
1441 SHARED_REGION_TRACE_ERROR(
1442 ("shared_region: %p [%d(%s)]: "
1443 "vnode_lookup(%s) failed (error=%d)\n",
1444 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1445 proc_getpid(p), p->p_comm,
1446 *expected_scdir_path, error));
1447 continue;
1448 }
1449
1450 /* check if parent is scdir_vp */
1451 assert(scdir_vp != NULL);
1452 if (vnode_parent(srfmp->vp) == scdir_vp) {
1453 (void)vnode_put(scdir_vp);
1454 scdir_vp = NULL;
1455 goto scdir_ok;
1456 }
1457 (void)vnode_put(scdir_vp);
1458 scdir_vp = NULL;
1459 }
1460 /* nothing matches */
1461 SHARED_REGION_TRACE_ERROR(
1462 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1463 "shared cache file not in expected directory\n",
1464 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1465 proc_getpid(p), p->p_comm,
1466 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1467 srfmp->vp->v_name));
1468 error = EPERM;
1469 goto done;
1470 }
1471 scdir_ok:
1472
1473 /* get vnode size */
1474 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
1475 if (error) {
1476 SHARED_REGION_TRACE_ERROR(
1477 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1478 "vnode_size(%p) failed (error=%d)\n",
1479 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1480 proc_getpid(p), p->p_comm,
1481 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1482 srfmp->vp->v_name,
1483 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
1484 goto done;
1485 }
1486 srfmp->file_size = fs;
1487
1488 /* get the file's memory object handle */
1489 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
1490 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
1491 SHARED_REGION_TRACE_ERROR(
1492 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1493 "no memory object\n",
1494 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1495 proc_getpid(p), p->p_comm,
1496 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1497 srfmp->vp->v_name));
1498 error = EINVAL;
1499 goto done;
1500 }
1501
1502 /* check that the mappings are properly covered by code signatures */
1503 if (!cs_system_enforcement()) {
1504 /* code signing is not enforced: no need to check */
1505 } else {
1506 for (i = 0; i < srfmp->mappings_count; i++) {
1507 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
1508 /* zero-filled mapping: not backed by the file */
1509 continue;
1510 }
1511 if (ubc_cs_is_range_codesigned(srfmp->vp,
1512 srfmp->mappings[i].sms_file_offset,
1513 srfmp->mappings[i].sms_size)) {
1514 /* this mapping is fully covered by code signatures */
1515 continue;
1516 }
1517 SHARED_REGION_TRACE_ERROR(
1518 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1519 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
1520 "is not code-signed\n",
1521 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1522 proc_getpid(p), p->p_comm,
1523 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1524 srfmp->vp->v_name,
1525 i, srfmp->mappings_count,
1526 srfmp->mappings[i].sms_address,
1527 srfmp->mappings[i].sms_size,
1528 srfmp->mappings[i].sms_file_offset,
1529 srfmp->mappings[i].sms_max_prot,
1530 srfmp->mappings[i].sms_init_prot));
1531 error = EINVAL;
1532 goto done;
1533 }
1534 }
1535 }
1536 done:
1537 if (error != 0) {
1538 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
1539 *sr_file_mappings = NULL;
1540 *shared_region_ptr = NULL;
1541 }
1542 return error;
1543 }
1544
1545 /*
1546 * shared_region_map_np()
1547 *
1548 * This system call is intended for dyld.
1549 *
1550 * dyld uses this to map a shared cache file into a shared region.
1551 * This is usually done only the first time a shared cache is needed.
1552 * Subsequent processes will just use the populated shared region without
1553 * requiring any further setup.
1554 */
1555 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)1556 _shared_region_map_and_slide(
1557 struct proc *p,
1558 uint32_t files_count,
1559 struct shared_file_np *files,
1560 uint32_t mappings_count,
1561 struct shared_file_mapping_slide_np *mappings)
1562 {
1563 int error = 0;
1564 kern_return_t kr = KERN_SUCCESS;
1565 struct _sr_file_mappings *sr_file_mappings = NULL;
1566 struct vnode *rdir_vp = NULL;
1567 struct vm_shared_region *shared_region = NULL;
1568
1569 /*
1570 * Get a reference to the current proc's root dir.
1571 * Need this to prevent racing with chroot.
1572 */
1573 proc_fdlock(p);
1574 rdir_vp = p->p_fd.fd_rdir;
1575 if (rdir_vp == NULL) {
1576 rdir_vp = rootvnode;
1577 }
1578 assert(rdir_vp != NULL);
1579 vnode_get(rdir_vp);
1580 proc_fdunlock(p);
1581
1582 /*
1583 * Turn files, mappings into sr_file_mappings and other setup.
1584 */
1585 error = shared_region_map_and_slide_setup(p, files_count,
1586 files, mappings_count, mappings,
1587 &sr_file_mappings, &shared_region, rdir_vp);
1588 if (error != 0) {
1589 vnode_put(rdir_vp);
1590 return error;
1591 }
1592
1593 /* map the file(s) into that shared region's submap */
1594 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
1595 if (kr != KERN_SUCCESS) {
1596 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
1597 "vm_shared_region_map_file() failed kr=0x%x\n",
1598 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1599 proc_getpid(p), p->p_comm, kr));
1600 }
1601
1602 /* convert kern_return_t to errno */
1603 switch (kr) {
1604 case KERN_SUCCESS:
1605 error = 0;
1606 break;
1607 case KERN_INVALID_ADDRESS:
1608 error = EFAULT;
1609 break;
1610 case KERN_PROTECTION_FAILURE:
1611 error = EPERM;
1612 break;
1613 case KERN_NO_SPACE:
1614 error = ENOMEM;
1615 break;
1616 case KERN_FAILURE:
1617 case KERN_INVALID_ARGUMENT:
1618 default:
1619 error = EINVAL;
1620 break;
1621 }
1622
1623 /*
1624 * Mark that this process is now using split libraries.
1625 */
1626 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
1627 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1628 }
1629
1630 vnode_put(rdir_vp);
1631 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
1632
1633 SHARED_REGION_TRACE_DEBUG(
1634 ("shared_region: %p [%d(%s)] <- map\n",
1635 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1636 proc_getpid(p), p->p_comm));
1637
1638 return error;
1639 }
1640
1641 /*
1642 * Clean up part of _shared_region_map_and_slide()
1643 * It had to be broken out of _shared_region_map_and_slide() to
1644 * prevent compiler inlining from blowing out the stack.
1645 */
1646 __attribute__((noinline))
1647 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)1648 shared_region_map_and_slide_cleanup(
1649 struct proc *p,
1650 uint32_t files_count,
1651 struct _sr_file_mappings *sr_file_mappings,
1652 struct vm_shared_region *shared_region)
1653 {
1654 struct _sr_file_mappings *srfmp;
1655 struct vnode_attr va;
1656
1657 if (sr_file_mappings != NULL) {
1658 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
1659 if (srfmp->vp != NULL) {
1660 vnode_lock_spin(srfmp->vp);
1661 srfmp->vp->v_flag |= VSHARED_DYLD;
1662 vnode_unlock(srfmp->vp);
1663
1664 /* update the vnode's access time */
1665 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
1666 VATTR_INIT(&va);
1667 nanotime(&va.va_access_time);
1668 VATTR_SET_ACTIVE(&va, va_access_time);
1669 vnode_setattr(srfmp->vp, &va, vfs_context_current());
1670 }
1671
1672 #if NAMEDSTREAMS
1673 /*
1674 * If the shared cache is compressed, it may
1675 * have a namedstream vnode instantiated for
1676 * for it. That namedstream vnode will also
1677 * have to be marked with VSHARED_DYLD.
1678 */
1679 if (vnode_hasnamedstreams(srfmp->vp)) {
1680 vnode_t svp;
1681 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
1682 NS_OPEN, 0, vfs_context_kernel()) == 0) {
1683 vnode_lock_spin(svp);
1684 svp->v_flag |= VSHARED_DYLD;
1685 vnode_unlock(svp);
1686 vnode_put(svp);
1687 }
1688 }
1689 #endif /* NAMEDSTREAMS */
1690 /*
1691 * release the vnode...
1692 * ubc_map() still holds it for us in the non-error case
1693 */
1694 (void) vnode_put(srfmp->vp);
1695 srfmp->vp = NULL;
1696 }
1697 if (srfmp->fp != NULL) {
1698 /* release the file descriptor */
1699 fp_drop(p, srfmp->fd, srfmp->fp, 0);
1700 srfmp->fp = NULL;
1701 }
1702 }
1703 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
1704 }
1705
1706 if (shared_region != NULL) {
1707 vm_shared_region_deallocate(shared_region);
1708 }
1709 }
1710
1711 /*
1712 * For each file mapped, we may have mappings for:
1713 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
1714 * so let's round up to 8 mappings per file.
1715 */
1716 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
1717
1718 /*
1719 * This is the new interface for setting up shared region mappings.
1720 *
1721 * The slide used for shared regions setup using this interface is done differently
1722 * from the old interface. The slide value passed in the shared_files_np represents
1723 * a max value. The kernel will choose a random value based on that, then use it
1724 * for all shared regions.
1725 */
1726 #if defined (__x86_64__)
1727 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
1728 #else
1729 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
1730 #endif
1731
1732 static inline __result_use_check kern_return_t
shared_region_map_and_slide_2_np_sanitize(struct proc * p,user_addr_t mappings_userspace_addr,unsigned int count,shared_file_mapping_slide_np_t * mappings)1733 shared_region_map_and_slide_2_np_sanitize(
1734 struct proc *p,
1735 user_addr_t mappings_userspace_addr,
1736 unsigned int count,
1737 shared_file_mapping_slide_np_t *mappings)
1738 {
1739 kern_return_t kr;
1740 vm_map_t map = current_map();
1741 mach_vm_address_t addr, end;
1742 mach_vm_offset_t offset, offset_end;
1743 mach_vm_size_t size, offset_size;
1744 user_addr_t slide_start, slide_end, slide_size;
1745 vm_prot_t cur;
1746 vm_prot_t max;
1747
1748 user_addr_t user_addr = mappings_userspace_addr;
1749
1750 for (size_t i = 0; i < count; i++) {
1751 shared_file_mapping_slide_np_ut mapping_u;
1752 /*
1753 * First we bring each mapping struct into our kernel stack to
1754 * avoid TOCTOU.
1755 */
1756 kr = shared_region_copyin(
1757 p,
1758 user_addr,
1759 1, // copy 1 element at a time
1760 sizeof(shared_file_mapping_slide_np_ut),
1761 &mapping_u);
1762 if (__improbable(kr != KERN_SUCCESS)) {
1763 return kr;
1764 }
1765
1766 /*
1767 * Then, we sanitize the data on the kernel stack.
1768 */
1769 kr = vm_sanitize_addr_size(
1770 mapping_u.sms_address_u,
1771 mapping_u.sms_size_u,
1772 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1773 map,
1774 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1775 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_START
1776 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_SIZE),
1777 &addr,
1778 &end,
1779 &size);
1780 if (__improbable(kr != KERN_SUCCESS)) {
1781 return kr;
1782 }
1783
1784 kr = vm_sanitize_addr_size(
1785 mapping_u.sms_file_offset_u,
1786 mapping_u.sms_size_u,
1787 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1788 PAGE_MASK,
1789 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1790 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1791 &offset,
1792 &offset_end,
1793 &offset_size);
1794 if (__improbable(kr != KERN_SUCCESS)) {
1795 return kr;
1796 }
1797 if (__improbable(0 != (offset & vm_map_page_mask(map)))) {
1798 return KERN_INVALID_ARGUMENT;
1799 }
1800
1801 /*
1802 * Unsafe access is immediately followed by wrap to
1803 * convert from addr to size.
1804 */
1805 mach_vm_size_ut sms_slide_size_u =
1806 vm_sanitize_wrap_size(
1807 VM_SANITIZE_UNSAFE_UNWRAP(
1808 mapping_u.sms_slide_size_u));
1809
1810 kr = vm_sanitize_addr_size(
1811 mapping_u.sms_slide_start_u,
1812 sms_slide_size_u,
1813 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1814 map,
1815 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1816 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1817 &slide_start,
1818 &slide_end,
1819 &slide_size);
1820 if (__improbable(kr != KERN_SUCCESS)) {
1821 return kr;
1822 }
1823
1824 kr = vm_sanitize_cur_and_max_prots(
1825 mapping_u.sms_init_prot_u,
1826 mapping_u.sms_max_prot_u,
1827 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1828 map,
1829 VM_PROT_SFM_EXTENSIONS_MASK | VM_PROT_TPRO,
1830 &cur,
1831 &max);
1832 if (__improbable(kr != KERN_SUCCESS)) {
1833 return kr;
1834 }
1835
1836 /*
1837 * Finally, we move the data from the kernel stack to our
1838 * caller-allocated kernel heap buffer.
1839 */
1840 mappings[i].sms_address = addr;
1841 mappings[i].sms_size = size;
1842 mappings[i].sms_file_offset = offset;
1843 mappings[i].sms_slide_size = slide_size;
1844 mappings[i].sms_slide_start = slide_start;
1845 mappings[i].sms_max_prot = max;
1846 mappings[i].sms_init_prot = cur;
1847
1848 if (__improbable(os_add_overflow(
1849 user_addr,
1850 sizeof(shared_file_mapping_slide_np_ut),
1851 &user_addr))) {
1852 return KERN_INVALID_ARGUMENT;
1853 }
1854 }
1855
1856 return KERN_SUCCESS;
1857 }
1858
1859 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)1860 shared_region_map_and_slide_2_np(
1861 struct proc *p,
1862 struct shared_region_map_and_slide_2_np_args *uap,
1863 __unused int *retvalp)
1864 {
1865 unsigned int files_count;
1866 struct shared_file_np *shared_files = NULL;
1867 unsigned int mappings_count;
1868 struct shared_file_mapping_slide_np *mappings = NULL;
1869 kern_return_t kr = KERN_SUCCESS;
1870
1871 files_count = uap->files_count;
1872 mappings_count = uap->mappings_count;
1873
1874 if (files_count == 0) {
1875 SHARED_REGION_TRACE_INFO(
1876 ("shared_region: %p [%d(%s)] map(): "
1877 "no files\n",
1878 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1879 proc_getpid(p), p->p_comm));
1880 kr = 0; /* no files to map: we're done ! */
1881 goto done;
1882 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
1883 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
1884 if (shared_files == NULL) {
1885 kr = KERN_RESOURCE_SHORTAGE;
1886 goto done;
1887 }
1888 } else {
1889 SHARED_REGION_TRACE_ERROR(
1890 ("shared_region: %p [%d(%s)] map(): "
1891 "too many files (%d) max %d\n",
1892 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1893 proc_getpid(p), p->p_comm,
1894 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
1895 kr = KERN_FAILURE;
1896 goto done;
1897 }
1898
1899 if (mappings_count == 0) {
1900 SHARED_REGION_TRACE_INFO(
1901 ("shared_region: %p [%d(%s)] map(): "
1902 "no mappings\n",
1903 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1904 proc_getpid(p), p->p_comm));
1905 kr = 0; /* no mappings: we're done ! */
1906 goto done;
1907 } else if (mappings_count <= SFM_MAX) {
1908 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
1909 if (mappings == NULL) {
1910 kr = KERN_RESOURCE_SHORTAGE;
1911 goto done;
1912 }
1913 } else {
1914 SHARED_REGION_TRACE_ERROR(
1915 ("shared_region: %p [%d(%s)] map(): "
1916 "too many mappings (%d) max %d\n",
1917 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1918 proc_getpid(p), p->p_comm,
1919 mappings_count, SFM_MAX));
1920 kr = KERN_FAILURE;
1921 goto done;
1922 }
1923
1924 /*
1925 * struct shared_file_np does not have fields that are subject to
1926 * sanitization, it is thus copied from userspace as is.
1927 */
1928 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
1929 if (kr != KERN_SUCCESS) {
1930 goto done;
1931 }
1932
1933 kr = shared_region_map_and_slide_2_np_sanitize(
1934 p,
1935 uap->mappings_u,
1936 mappings_count,
1937 mappings);
1938 if (__improbable(kr != KERN_SUCCESS)) {
1939 kr = vm_sanitize_get_kr(kr);
1940 goto done;
1941 }
1942
1943 uint32_t max_slide = shared_files[0].sf_slide;
1944 uint32_t random_val;
1945 uint32_t slide_amount;
1946
1947 if (max_slide != 0) {
1948 read_random(&random_val, sizeof random_val);
1949 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
1950 } else {
1951 slide_amount = 0;
1952 }
1953 #if DEVELOPMENT || DEBUG
1954 extern bool bootarg_disable_aslr;
1955 if (bootarg_disable_aslr) {
1956 slide_amount = 0;
1957 }
1958 #endif /* DEVELOPMENT || DEBUG */
1959
1960 /*
1961 * Fix up the mappings to reflect the desired slide.
1962 */
1963 unsigned int f;
1964 unsigned int m = 0;
1965 unsigned int i;
1966 for (f = 0; f < files_count; ++f) {
1967 shared_files[f].sf_slide = slide_amount;
1968 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
1969 if (m >= mappings_count) {
1970 SHARED_REGION_TRACE_ERROR(
1971 ("shared_region: %p [%d(%s)] map(): "
1972 "mapping count argument was too small\n",
1973 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1974 proc_getpid(p), p->p_comm));
1975 kr = KERN_FAILURE;
1976 goto done;
1977 }
1978 if (__improbable(
1979 os_add_overflow(
1980 mappings[m].sms_address,
1981 slide_amount,
1982 &mappings[m].sms_address))) {
1983 kr = KERN_INVALID_ARGUMENT;
1984 goto done;
1985 }
1986 if (mappings[m].sms_slide_size != 0) {
1987 mach_vm_address_t discard;
1988 /* Slide and check that new start/size pairs do not overflow. */
1989 if (__improbable(
1990 os_add_overflow(
1991 mappings[m].sms_slide_start,
1992 slide_amount,
1993 &mappings[m].sms_slide_start) ||
1994 os_add_overflow(
1995 mappings[m].sms_slide_start,
1996 mappings[m].sms_slide_size,
1997 &discard))) {
1998 kr = KERN_INVALID_ARGUMENT;
1999 goto done;
2000 }
2001 }
2002 }
2003 }
2004
2005 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2006 done:
2007 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2008 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2009 return kr;
2010 }
2011
2012 /*
2013 * A syscall for dyld to use to map data pages that need load time relocation fixups.
2014 * The fixups are performed by a custom pager during page-in, so the pages still appear
2015 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2016 * on demand later, all w/o using the compressor.
2017 *
2018 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2019 * running, they are COW'd as normal.
2020 */
2021 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2022 map_with_linking_np(
2023 struct proc *p,
2024 struct map_with_linking_np_args *uap,
2025 __unused int *retvalp)
2026 {
2027 uint32_t region_count;
2028 uint32_t r;
2029 struct mwl_region *regions = NULL;
2030 struct mwl_region *rp;
2031 uint32_t link_info_size;
2032 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
2033 struct mwl_info_hdr *info_hdr = NULL;
2034 uint64_t binds_size;
2035 int fd;
2036 struct fileproc *fp = NULL;
2037 struct vnode *vp = NULL;
2038 size_t file_size;
2039 off_t fs;
2040 struct vnode_attr va;
2041 memory_object_control_t file_control = NULL;
2042 int error;
2043 kern_return_t kr = KERN_SUCCESS;
2044
2045 /*
2046 * Check if dyld has told us it finished with this call.
2047 */
2048 if (p->p_disallow_map_with_linking) {
2049 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
2050 __func__, proc_getpid(p), p->p_comm);
2051 kr = KERN_FAILURE;
2052 goto done;
2053 }
2054
2055 /*
2056 * First we do some sanity checking on what dyld has passed us.
2057 */
2058 region_count = uap->region_count;
2059 link_info_size = uap->link_info_size;
2060 if (region_count == 0) {
2061 printf("%s: [%d(%s)]: region_count == 0\n",
2062 __func__, proc_getpid(p), p->p_comm);
2063 kr = KERN_FAILURE;
2064 goto done;
2065 }
2066 if (region_count > MWL_MAX_REGION_COUNT) {
2067 printf("%s: [%d(%s)]: region_count too big %d\n",
2068 __func__, proc_getpid(p), p->p_comm, region_count);
2069 kr = KERN_FAILURE;
2070 goto done;
2071 }
2072
2073 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
2074 printf("%s: [%d(%s)]: link_info_size too small\n",
2075 __func__, proc_getpid(p), p->p_comm);
2076 kr = KERN_FAILURE;
2077 goto done;
2078 }
2079 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
2080 printf("%s: [%d(%s)]: link_info_size too big %d\n",
2081 __func__, proc_getpid(p), p->p_comm, link_info_size);
2082 kr = KERN_FAILURE;
2083 goto done;
2084 }
2085
2086 /*
2087 * Allocate and copyin the regions and link info
2088 */
2089 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
2090 if (regions == NULL) {
2091 printf("%s: [%d(%s)]: failed to allocate regions\n",
2092 __func__, proc_getpid(p), p->p_comm);
2093 kr = KERN_RESOURCE_SHORTAGE;
2094 goto done;
2095 }
2096 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
2097 if (kr != KERN_SUCCESS) {
2098 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
2099 __func__, proc_getpid(p), p->p_comm, kr);
2100 goto done;
2101 }
2102
2103 link_info = kalloc_data(link_info_size, Z_WAITOK);
2104 if (link_info == NULL) {
2105 printf("%s: [%d(%s)]: failed to allocate link_info\n",
2106 __func__, proc_getpid(p), p->p_comm);
2107 kr = KERN_RESOURCE_SHORTAGE;
2108 goto done;
2109 }
2110 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
2111 if (kr != KERN_SUCCESS) {
2112 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
2113 __func__, proc_getpid(p), p->p_comm, kr);
2114 goto done;
2115 }
2116
2117 /*
2118 * Do some verification the data structures.
2119 */
2120 info_hdr = (struct mwl_info_hdr *)link_info;
2121 if (info_hdr->mwli_version != MWL_INFO_VERS) {
2122 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
2123 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
2124 kr = KERN_FAILURE;
2125 goto done;
2126 }
2127
2128 if (info_hdr->mwli_binds_offset > link_info_size) {
2129 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
2130 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
2131 kr = KERN_FAILURE;
2132 goto done;
2133 }
2134
2135 /* some older devs have s/w page size > h/w page size, no need to support them */
2136 if (info_hdr->mwli_page_size != PAGE_SIZE) {
2137 /* no printf, since this is expected on some devices */
2138 kr = KERN_INVALID_ARGUMENT;
2139 goto done;
2140 }
2141
2142 binds_size = (uint64_t)info_hdr->mwli_binds_count *
2143 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
2144 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
2145 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
2146 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
2147 kr = KERN_FAILURE;
2148 goto done;
2149 }
2150
2151 if (info_hdr->mwli_chains_offset > link_info_size) {
2152 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
2153 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
2154 kr = KERN_FAILURE;
2155 goto done;
2156 }
2157
2158
2159 /*
2160 * Ensure the chained starts in the link info and make sure the
2161 * segment info offsets are within bounds.
2162 */
2163 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
2164 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
2165 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2166 kr = KERN_FAILURE;
2167 goto done;
2168 }
2169 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
2170 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
2171 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2172 kr = KERN_FAILURE;
2173 goto done;
2174 }
2175
2176 /* Note that more verification of offsets is done in the pager itself */
2177
2178 /*
2179 * Ensure we've only been given one FD and verify valid protections.
2180 */
2181 fd = regions[0].mwlr_fd;
2182 for (r = 0; r < region_count; ++r) {
2183 if (regions[r].mwlr_fd != fd) {
2184 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
2185 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
2186 kr = KERN_FAILURE;
2187 goto done;
2188 }
2189
2190 /*
2191 * Only allow data mappings and not zero fill. Permit TPRO
2192 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
2193 */
2194 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
2195 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
2196 __func__, proc_getpid(p), p->p_comm);
2197 kr = KERN_FAILURE;
2198 goto done;
2199 }
2200 if (regions[r].mwlr_protections & VM_PROT_ZF) {
2201 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
2202 __func__, proc_getpid(p), p->p_comm, r);
2203 kr = KERN_FAILURE;
2204 goto done;
2205 }
2206 if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
2207 !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
2208 printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
2209 __func__, proc_getpid(p), p->p_comm, r);
2210 kr = KERN_FAILURE;
2211 goto done;
2212 }
2213 }
2214
2215
2216 /* get file structure from file descriptor */
2217 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
2218 if (error) {
2219 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
2220 __func__, proc_getpid(p), p->p_comm, error);
2221 kr = KERN_FAILURE;
2222 goto done;
2223 }
2224
2225 /* We need at least read permission on the file */
2226 if (!(fp->fp_glob->fg_flag & FREAD)) {
2227 printf("%s: [%d(%s)]: not readable\n",
2228 __func__, proc_getpid(p), p->p_comm);
2229 kr = KERN_FAILURE;
2230 goto done;
2231 }
2232
2233 /* Get the vnode from file structure */
2234 vp = (struct vnode *)fp_get_data(fp);
2235 error = vnode_getwithref(vp);
2236 if (error) {
2237 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
2238 __func__, proc_getpid(p), p->p_comm, error);
2239 kr = KERN_FAILURE;
2240 vp = NULL; /* just to be sure */
2241 goto done;
2242 }
2243
2244 /* Make sure the vnode is a regular file */
2245 if (vp->v_type != VREG) {
2246 printf("%s: [%d(%s)]: vnode not VREG\n",
2247 __func__, proc_getpid(p), p->p_comm);
2248 kr = KERN_FAILURE;
2249 goto done;
2250 }
2251
2252 /* get vnode size */
2253 error = vnode_size(vp, &fs, vfs_context_current());
2254 if (error) {
2255 goto done;
2256 }
2257 file_size = fs;
2258
2259 /* get the file's memory object handle */
2260 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
2261 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
2262 printf("%s: [%d(%s)]: no memory object\n",
2263 __func__, proc_getpid(p), p->p_comm);
2264 kr = KERN_FAILURE;
2265 goto done;
2266 }
2267
2268 for (r = 0; r < region_count; ++r) {
2269 rp = ®ions[r];
2270
2271 #if CONFIG_MACF
2272 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
2273 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2274 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
2275 if (error) {
2276 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
2277 __func__, proc_getpid(p), p->p_comm, r, error);
2278 kr = KERN_FAILURE;
2279 goto done;
2280 }
2281 #endif /* MAC */
2282
2283 /* check that the mappings are properly covered by code signatures */
2284 if (cs_system_enforcement()) {
2285 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
2286 printf("%s: [%d(%s)]: region %d, not code signed\n",
2287 __func__, proc_getpid(p), p->p_comm, r);
2288 kr = KERN_FAILURE;
2289 goto done;
2290 }
2291 }
2292 }
2293
2294 /* update the vnode's access time */
2295 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
2296 VATTR_INIT(&va);
2297 nanotime(&va.va_access_time);
2298 VATTR_SET_ACTIVE(&va, va_access_time);
2299 vnode_setattr(vp, &va, vfs_context_current());
2300 }
2301
2302 /* get the VM to do the work */
2303 kr = vm_map_with_linking(proc_task(p), regions, region_count, &link_info, link_info_size, file_control);
2304
2305 done:
2306 if (fp != NULL) {
2307 /* release the file descriptor */
2308 fp_drop(p, fd, fp, 0);
2309 }
2310 if (vp != NULL) {
2311 (void)vnode_put(vp);
2312 }
2313 if (regions != NULL) {
2314 kfree_data(regions, region_count * sizeof(regions[0]));
2315 }
2316 /* link info is NULL if it is used in the pager, if things worked */
2317 if (link_info != NULL) {
2318 kfree_data(link_info, link_info_size);
2319 }
2320
2321 switch (kr) {
2322 case KERN_SUCCESS:
2323 return 0;
2324 case KERN_RESOURCE_SHORTAGE:
2325 return ENOMEM;
2326 default:
2327 return EINVAL;
2328 }
2329 }
2330
2331 #if DEBUG || DEVELOPMENT
2332 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
2333 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
2334 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
2335 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
2336 #endif /* DEBUG || DEVELOPMENT */
2337
2338 /* sysctl overflow room */
2339
2340 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2341 (int *) &page_size, 0, "vm page size");
2342
2343 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2344 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2345 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2346 extern unsigned int vm_page_free_target;
2347 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2348 &vm_page_free_target, 0, "Pageout daemon free target");
2349
2350 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2351 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2352
2353 static int
2354 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2355 {
2356 #pragma unused(oidp, arg1, arg2)
2357 unsigned int page_free_wanted;
2358
2359 page_free_wanted = mach_vm_ctl_page_free_wanted();
2360 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2361 }
2362 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2363 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2364 0, 0, vm_ctl_page_free_wanted, "I", "");
2365
2366 extern unsigned int vm_page_purgeable_count;
2367 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2368 &vm_page_purgeable_count, 0, "Purgeable page count");
2369
2370 extern unsigned int vm_page_purgeable_wired_count;
2371 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2372 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2373
2374 extern unsigned int vm_page_kern_lpage_count;
2375 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2376 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2377
2378 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
2379 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
2380
2381 #if DEVELOPMENT || DEBUG
2382 #if __ARM_MIXED_PAGE_SIZE__
2383 static int vm_mixed_pagesize_supported = 1;
2384 #else
2385 static int vm_mixed_pagesize_supported = 0;
2386 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2387 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2388 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
2389
2390 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
2391 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
2392
2393 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
2394 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
2395 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
2396 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
2397 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
2398 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
2399
2400 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2401 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
2402 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2403 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
2404 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2405 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
2406 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2407 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
2408 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2409 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
2410 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2411 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
2412 #endif /* DEVELOPMENT || DEBUG */
2413
2414 extern int madvise_free_debug;
2415 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
2416 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
2417 extern int madvise_free_debug_sometimes;
2418 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
2419 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
2420
2421 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2422 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
2423 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2424 &vm_page_stats_reusable.reusable_pages_success, "");
2425 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2426 &vm_page_stats_reusable.reusable_pages_failure, "");
2427 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2428 &vm_page_stats_reusable.reusable_pages_shared, "");
2429 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2430 &vm_page_stats_reusable.all_reusable_calls, "");
2431 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2432 &vm_page_stats_reusable.partial_reusable_calls, "");
2433 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2434 &vm_page_stats_reusable.reuse_pages_success, "");
2435 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2436 &vm_page_stats_reusable.reuse_pages_failure, "");
2437 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2438 &vm_page_stats_reusable.all_reuse_calls, "");
2439 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2440 &vm_page_stats_reusable.partial_reuse_calls, "");
2441 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2442 &vm_page_stats_reusable.can_reuse_success, "");
2443 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2444 &vm_page_stats_reusable.can_reuse_failure, "");
2445 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
2446 &vm_page_stats_reusable.reusable_reclaimed, "");
2447 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
2448 &vm_page_stats_reusable.reusable_nonwritable, "");
2449 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2450 &vm_page_stats_reusable.reusable_shared, "");
2451 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2452 &vm_page_stats_reusable.free_shared, "");
2453
2454
2455 extern unsigned int vm_page_free_count, vm_page_speculative_count;
2456 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
2457 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
2458
2459 extern unsigned int vm_page_cleaned_count;
2460 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
2461
2462 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
2463 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
2464 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
2465
2466 /* pageout counts */
2467 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
2468 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
2469
2470 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
2471 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
2472 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2473 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
2474 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2475 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
2476
2477 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
2478 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
2479 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
2480 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
2481 extern unsigned int vm_page_realtime_count;
2482 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
2483 extern int vm_pageout_protect_realtime;
2484 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
2485
2486 /* counts of pages prefaulted when entering a memory object */
2487 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
2488 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
2489 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
2490
2491 #if defined (__x86_64__)
2492 extern unsigned int vm_clump_promote_threshold;
2493 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
2494 #if DEVELOPMENT || DEBUG
2495 extern unsigned long vm_clump_stats[];
2496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
2497 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
2498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
2499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
2500 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
2501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
2502 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
2503 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
2504 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
2505 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
2506 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
2507 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
2508 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
2509 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
2510 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
2511 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
2512 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
2513 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
2514 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
2515 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
2516 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
2517 #endif /* if DEVELOPMENT || DEBUG */
2518 #endif /* #if defined (__x86_64__) */
2519
2520 #if CONFIG_SECLUDED_MEMORY
2521
2522 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
2523 extern unsigned int vm_page_secluded_target;
2524 extern unsigned int vm_page_secluded_count;
2525 extern unsigned int vm_page_secluded_count_free;
2526 extern unsigned int vm_page_secluded_count_inuse;
2527 extern unsigned int vm_page_secluded_count_over_target;
2528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
2529 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
2530 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
2531 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
2532 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
2533
2534 extern struct vm_page_secluded_data vm_page_secluded;
2535 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
2536 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
2537 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
2538 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
2539 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
2540 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
2541 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
2542 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
2543 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
2544
2545 #endif /* CONFIG_SECLUDED_MEMORY */
2546
2547 #if CONFIG_DEFERRED_RECLAIM
2548 #pragma mark Deferred Reclaim
2549 SYSCTL_NODE(_vm, OID_AUTO, reclaim, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Deferred Memory Reclamation");
2550 #if DEVELOPMENT || DEBUG
2551 /*
2552 * VM reclaim testing
2553 */
2554 extern bool vm_deferred_reclamation_block_until_task_has_been_reclaimed(task_t task);
2555
2556 static int
2557 sysctl_vm_reclaim_wait_for_pid SYSCTL_HANDLER_ARGS
2558 {
2559 int error = EINVAL, pid = 0;
2560 /*
2561 * Only send on write
2562 */
2563 error = sysctl_handle_int(oidp, &pid, 0, req);
2564 if (error || !req->newptr) {
2565 return error;
2566 }
2567 if (pid <= 0) {
2568 return EINVAL;
2569 }
2570 proc_t p = proc_find(pid);
2571 if (p == PROC_NULL) {
2572 return ESRCH;
2573 }
2574 task_t t = proc_task(p);
2575 if (t == TASK_NULL) {
2576 proc_rele(p);
2577 return ESRCH;
2578 }
2579 task_reference(t);
2580 proc_rele(p);
2581
2582 bool success = vm_deferred_reclamation_block_until_task_has_been_reclaimed(t);
2583 if (success) {
2584 error = 0;
2585 }
2586 task_deallocate(t);
2587
2588 return error;
2589 }
2590
2591 SYSCTL_PROC(_vm_reclaim, OID_AUTO, wait_for_pid,
2592 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2593 &sysctl_vm_reclaim_wait_for_pid, "I",
2594 "Block until the given pid has been drained by kernel GC");
2595
2596 static int
2597 sysctl_vm_reclaim_drain_pid SYSCTL_HANDLER_ARGS
2598 {
2599 int error = EINVAL;
2600 kern_return_t kr;
2601 pid_t pid;
2602 error = sysctl_handle_int(oidp, &pid, 0, req);
2603 /* Only reclaim on write */
2604 if (error || !req->newptr) {
2605 return error;
2606 }
2607 if (pid <= 0) {
2608 return EINVAL;
2609 }
2610 proc_t p = proc_find(pid);
2611 if (p == PROC_NULL) {
2612 return ESRCH;
2613 }
2614 task_t t = proc_task(p);
2615 if (t == TASK_NULL) {
2616 proc_rele(p);
2617 return ESRCH;
2618 }
2619 task_reference(t);
2620 proc_rele(p);
2621 kr = vm_deferred_reclamation_task_drain(t, RECLAIM_OPTIONS_NONE);
2622 task_deallocate(t);
2623 return mach_to_bsd_errno(kr);
2624 }
2625
2626 SYSCTL_PROC(_vm_reclaim, OID_AUTO, drain_pid,
2627 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2628 &sysctl_vm_reclaim_drain_pid, "I",
2629 "Drain the deferred reclamation buffer for a pid");
2630
2631 static int
proc_filter_reclaimable(proc_t p,__unused void * arg)2632 proc_filter_reclaimable(proc_t p, __unused void *arg)
2633 {
2634 task_t task = proc_task(p);
2635 return vm_deferred_reclamation_task_has_ring(task);
2636 }
2637
2638 static int
proc_reclaim_drain(proc_t p,__unused void * arg)2639 proc_reclaim_drain(proc_t p, __unused void *arg)
2640 {
2641 kern_return_t kr;
2642 task_t task = proc_task(p);
2643 kr = vm_deferred_reclamation_task_drain(task, RECLAIM_OPTIONS_NONE);
2644 return mach_to_bsd_errno(kr);
2645 }
2646
2647 static int
2648 sysctl_vm_reclaim_drain_all SYSCTL_HANDLER_ARGS
2649 {
2650 int error;
2651 int val;
2652 if (!req->newptr) {
2653 return EINVAL;
2654 }
2655 error = sysctl_handle_int(oidp, &val, 0, req);
2656 if (error || val == FALSE) {
2657 return error;
2658 }
2659 proc_iterate(PROC_ALLPROCLIST, proc_reclaim_drain, NULL,
2660 proc_filter_reclaimable, NULL);
2661 return 0;
2662 }
2663
2664 SYSCTL_PROC(_vm_reclaim, OID_AUTO, drain_all,
2665 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2666 &sysctl_vm_reclaim_drain_all, "I",
2667 "Fully reclaim from every deferred reclamation buffer on the system");
2668
2669 extern uint32_t vm_reclaim_buffer_count;
2670 extern uint64_t vm_reclaim_gc_epoch;
2671 extern uint64_t vm_reclaim_gc_reclaim_count;
2672 #if XNU_TARGET_OS_IOS
2673 extern uint64_t vm_reclaim_max_threshold;
2674 #else /* !XNU_TARGET_OS_IOS */
2675 extern bool vm_reclaim_debug;
2676 extern bool vm_reclaim_enabled;
2677 extern uint64_t vm_reclaim_sampling_period_ns;
2678 extern uint64_t vm_reclaim_sampling_period_abs;
2679 extern uint32_t vm_reclaim_autotrim_pct_normal;
2680 extern uint32_t vm_reclaim_autotrim_pct_pressure;
2681 extern uint32_t vm_reclaim_autotrim_pct_critical;
2682 extern uint32_t vm_reclaim_wma_weight_base;
2683 extern uint32_t vm_reclaim_wma_weight_cur;
2684 extern uint32_t vm_reclaim_wma_denom;
2685 extern uint64_t vm_reclaim_abandonment_threshold;
2686 #endif /* XNU_TARGET_OS_IOS */
2687
2688 SYSCTL_UINT(_vm_reclaim, OID_AUTO, reclaim_buffer_count,
2689 CTLFLAG_RD | CTLFLAG_LOCKED, (uint32_t *)&vm_reclaim_buffer_count, 0,
2690 "The number of deferred memory buffers currently alive");
2691 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, reclaim_gc_epoch,
2692 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_gc_epoch,
2693 "Number of times the global GC thread has run");
2694 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, reclaim_gc_reclaim_count,
2695 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_gc_reclaim_count,
2696 "Number of times the global GC thread has reclaimed from a buffer");
2697 #if XNU_TARGET_OS_IOS
2698 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, max_threshold,
2699 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold,
2700 "Maximum amount of virtual memory (in B) that may be deferred without "
2701 "synchronous reclamation");
2702 #else /* !XNU_TARGET_OS_IOS */
2703 SYSCTL_COMPAT_UINT(_vm_reclaim, OID_AUTO, enabled,
2704 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_enabled, 0,
2705 "Whether deferred memory reclamation is enabled on this system");
2706 SYSCTL_COMPAT_UINT(_vm_reclaim, OID_AUTO, debug,
2707 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_debug, 0,
2708 "Whether vm.reclaim debug logs are enabled");
2709 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_normal,
2710 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_normal, 0,
2711 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2712 "to engage auto-trim when the system is operating normally");
2713 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_pressure,
2714 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_pressure, 0,
2715 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2716 "to engage auto-trim when the system is under memory pressure");
2717 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_critical,
2718 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_critical, 0,
2719 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2720 "to engage auto-trim when the system is under critical memory pressure");
2721 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_weight_base,
2722 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_weight_base, 0,
2723 "Weight applied to historical minimum buffer size samples");
2724 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_weight_cur,
2725 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_weight_cur, 0,
2726 "Weight applied to current sampled minimum buffer size");
2727 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_denom,
2728 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_denom, 0,
2729 "Denominator for weighted moving average calculation");
2730 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, abandonment_threshold,
2731 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_abandonment_threshold,
2732 "The number of sampling periods between accounting updates that may elapse "
2733 "before the buffer is considered \"abandoned\"");
2734
2735 static int
2736 sysctl_vm_reclaim_sampling_period SYSCTL_HANDLER_ARGS
2737 {
2738 uint64_t new_val_ns;
2739 uint64_t old_val_ns = vm_reclaim_sampling_period_ns;
2740 int err = sysctl_io_number(req, vm_reclaim_sampling_period_ns,
2741 sizeof(vm_reclaim_sampling_period_ns), &new_val_ns, NULL);
2742 if (err || !req->newptr) {
2743 return err;
2744 }
2745 if (new_val_ns != old_val_ns) {
2746 vm_reclaim_sampling_period_ns = new_val_ns;
2747 nanoseconds_to_absolutetime(vm_reclaim_sampling_period_ns, &vm_reclaim_sampling_period_abs);
2748 }
2749 return 0;
2750 }
2751
2752 SYSCTL_PROC(_vm_reclaim, OID_AUTO, sampling_period_ns,
2753 CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, sysctl_vm_reclaim_sampling_period, "I",
2754 "Interval (nanoseconds) at which to sample the minimum buffer size and "
2755 "consider trimming excess");
2756 #endif /* XNU_TARGET_OS_IOS */
2757 #endif /* DEVELOPMENT || DEBUG */
2758 #endif /* CONFIG_DEFERRED_RECLAIM */
2759
2760 #include <kern/thread.h>
2761 #include <sys/user.h>
2762
2763 void vm_pageout_io_throttle(void);
2764
2765 void
vm_pageout_io_throttle(void)2766 vm_pageout_io_throttle(void)
2767 {
2768 struct uthread *uthread = current_uthread();
2769
2770 /*
2771 * thread is marked as a low priority I/O type
2772 * and the I/O we issued while in this cleaning operation
2773 * collided with normal I/O operations... we'll
2774 * delay in order to mitigate the impact of this
2775 * task on the normal operation of the system
2776 */
2777
2778 if (uthread->uu_lowpri_window) {
2779 throttle_lowpri_io(1);
2780 }
2781 }
2782
2783 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)2784 vm_pressure_monitor(
2785 __unused struct proc *p,
2786 struct vm_pressure_monitor_args *uap,
2787 int *retval)
2788 {
2789 kern_return_t kr;
2790 uint32_t pages_reclaimed;
2791 uint32_t pages_wanted;
2792
2793 kr = mach_vm_pressure_monitor(
2794 (boolean_t) uap->wait_for_pressure,
2795 uap->nsecs_monitored,
2796 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
2797 &pages_wanted);
2798
2799 switch (kr) {
2800 case KERN_SUCCESS:
2801 break;
2802 case KERN_ABORTED:
2803 return EINTR;
2804 default:
2805 return EINVAL;
2806 }
2807
2808 if (uap->pages_reclaimed) {
2809 if (copyout((void *)&pages_reclaimed,
2810 uap->pages_reclaimed,
2811 sizeof(pages_reclaimed)) != 0) {
2812 return EFAULT;
2813 }
2814 }
2815
2816 *retval = (int) pages_wanted;
2817 return 0;
2818 }
2819
2820 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)2821 kas_info(struct proc *p,
2822 struct kas_info_args *uap,
2823 int *retval __unused)
2824 {
2825 #ifndef CONFIG_KAS_INFO
2826 (void)p;
2827 (void)uap;
2828 return ENOTSUP;
2829 #else /* CONFIG_KAS_INFO */
2830 int selector = uap->selector;
2831 user_addr_t valuep = uap->value;
2832 user_addr_t sizep = uap->size;
2833 user_size_t size, rsize;
2834 int error;
2835
2836 if (!kauth_cred_issuser(kauth_cred_get())) {
2837 return EPERM;
2838 }
2839
2840 #if CONFIG_MACF
2841 error = mac_system_check_kas_info(kauth_cred_get(), selector);
2842 if (error) {
2843 return error;
2844 }
2845 #endif
2846
2847 if (IS_64BIT_PROCESS(p)) {
2848 user64_size_t size64;
2849 error = copyin(sizep, &size64, sizeof(size64));
2850 size = (user_size_t)size64;
2851 } else {
2852 user32_size_t size32;
2853 error = copyin(sizep, &size32, sizeof(size32));
2854 size = (user_size_t)size32;
2855 }
2856 if (error) {
2857 return error;
2858 }
2859
2860 switch (selector) {
2861 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
2862 {
2863 uint64_t slide = vm_kernel_slide;
2864
2865 if (sizeof(slide) != size) {
2866 return EINVAL;
2867 }
2868
2869 error = copyout(&slide, valuep, sizeof(slide));
2870 if (error) {
2871 return error;
2872 }
2873 rsize = size;
2874 }
2875 break;
2876 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
2877 {
2878 uint32_t i;
2879 kernel_mach_header_t *mh = &_mh_execute_header;
2880 struct load_command *cmd;
2881 cmd = (struct load_command*) &mh[1];
2882 uint64_t *bases;
2883 rsize = mh->ncmds * sizeof(uint64_t);
2884
2885 /*
2886 * Return the size if no data was passed
2887 */
2888 if (valuep == 0) {
2889 break;
2890 }
2891
2892 if (rsize > size) {
2893 return EINVAL;
2894 }
2895
2896 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
2897
2898 for (i = 0; i < mh->ncmds; i++) {
2899 if (cmd->cmd == LC_SEGMENT_KERNEL) {
2900 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
2901 bases[i] = (uint64_t)sg->vmaddr;
2902 }
2903 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
2904 }
2905
2906 error = copyout(bases, valuep, rsize);
2907
2908 kfree_data(bases, rsize);
2909
2910 if (error) {
2911 return error;
2912 }
2913 }
2914 break;
2915 case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
2916 case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
2917 {
2918 #if CONFIG_SPTM
2919 const uint64_t slide =
2920 (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
2921 #else
2922 const uint64_t slide = 0;
2923 #endif
2924
2925 if (sizeof(slide) != size) {
2926 return EINVAL;
2927 }
2928
2929 error = copyout(&slide, valuep, sizeof(slide));
2930 if (error) {
2931 return error;
2932 }
2933 rsize = size;
2934 }
2935 break;
2936 default:
2937 return EINVAL;
2938 }
2939
2940 if (IS_64BIT_PROCESS(p)) {
2941 user64_size_t size64 = (user64_size_t)rsize;
2942 error = copyout(&size64, sizep, sizeof(size64));
2943 } else {
2944 user32_size_t size32 = (user32_size_t)rsize;
2945 error = copyout(&size32, sizep, sizeof(size32));
2946 }
2947
2948 return error;
2949 #endif /* CONFIG_KAS_INFO */
2950 }
2951
2952 #pragma clang diagnostic push
2953 #pragma clang diagnostic ignored "-Wcast-qual"
2954 #pragma clang diagnostic ignored "-Wunused-function"
2955
2956 static void
asserts()2957 asserts()
2958 {
2959 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
2960 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
2961 }
2962
2963 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
2964 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
2965 #pragma clang diagnostic pop
2966
2967 extern uint32_t vm_page_pages;
2968 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
2969
2970 extern uint32_t vm_page_busy_absent_skipped;
2971 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
2972
2973 extern uint32_t vm_page_upl_tainted;
2974 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
2975
2976 extern uint32_t vm_page_iopl_tainted;
2977 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
2978
2979 #if __arm64__ && (DEVELOPMENT || DEBUG)
2980 extern int vm_footprint_suspend_allowed;
2981 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
2982
2983 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
2984 static int
2985 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
2986 {
2987 #pragma unused(oidp, arg1, arg2)
2988 int error = 0;
2989 int new_value;
2990
2991 if (req->newptr == USER_ADDR_NULL) {
2992 return 0;
2993 }
2994 error = SYSCTL_IN(req, &new_value, sizeof(int));
2995 if (error) {
2996 return error;
2997 }
2998 if (!vm_footprint_suspend_allowed) {
2999 if (new_value != 0) {
3000 /* suspends are not allowed... */
3001 return 0;
3002 }
3003 /* ... but let resumes proceed */
3004 }
3005 DTRACE_VM2(footprint_suspend,
3006 vm_map_t, current_map(),
3007 int, new_value);
3008
3009 pmap_footprint_suspend(current_map(), new_value);
3010
3011 return 0;
3012 }
3013 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3014 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3015 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3016 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3017
3018 extern uint64_t vm_map_corpse_footprint_count;
3019 extern uint64_t vm_map_corpse_footprint_size_avg;
3020 extern uint64_t vm_map_corpse_footprint_size_max;
3021 extern uint64_t vm_map_corpse_footprint_full;
3022 extern uint64_t vm_map_corpse_footprint_no_buf;
3023 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3024 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3025 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3026 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3027 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3028 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3029 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3030 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3031 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3032 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3033
3034 #if CODE_SIGNING_MONITOR
3035 extern uint64_t vm_cs_defer_to_csm;
3036 extern uint64_t vm_cs_defer_to_csm_not;
3037 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3038 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3039 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3040 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3041 #endif /* CODE_SIGNING_MONITOR */
3042
3043 extern uint64_t shared_region_pager_copied;
3044 extern uint64_t shared_region_pager_slid;
3045 extern uint64_t shared_region_pager_slid_error;
3046 extern uint64_t shared_region_pager_reclaimed;
3047 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3048 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3049 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3050 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3051 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3052 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3053 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3054 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3055 extern int shared_region_destroy_delay;
3056 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3057 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3058
3059 #if MACH_ASSERT
3060 extern int pmap_ledgers_panic_leeway;
3061 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3062 #endif /* MACH_ASSERT */
3063
3064
3065 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3066 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3067 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3068 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3069 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3070 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3071 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3072 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3073 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3074 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3075 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3076 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3077 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3078 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3079 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3080 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3081 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3082 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3083 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3084 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3085 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3086 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3087 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3088 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3089 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3090 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3091 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3092 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3093 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3094 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3095 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3096 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3097 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3098 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3099 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3100 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3101 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3102 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3103 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3104
3105 extern int vm_protect_privileged_from_untrusted;
3106 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3107 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3108 extern uint64_t vm_copied_on_read;
3109 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3110 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3111
3112 extern int vm_shared_region_count;
3113 extern int vm_shared_region_peak;
3114 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3115 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3116 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3117 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3118 #if DEVELOPMENT || DEBUG
3119 extern unsigned int shared_region_pagers_resident_count;
3120 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3121 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3122 extern unsigned int shared_region_pagers_resident_peak;
3123 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3124 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3125 extern int shared_region_pager_count;
3126 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3127 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3128 #if __has_feature(ptrauth_calls)
3129 extern int shared_region_key_count;
3130 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3131 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3132 extern int vm_shared_region_reslide_count;
3133 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3134 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3135 #endif /* __has_feature(ptrauth_calls) */
3136 #endif /* DEVELOPMENT || DEBUG */
3137
3138 #if MACH_ASSERT
3139 extern int debug4k_filter;
3140 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3141 extern int debug4k_panic_on_terminate;
3142 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3143 extern int debug4k_panic_on_exception;
3144 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3145 extern int debug4k_panic_on_misaligned_sharing;
3146 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3147 #endif /* MACH_ASSERT */
3148
3149 extern uint64_t vm_map_set_size_limit_count;
3150 extern uint64_t vm_map_set_data_limit_count;
3151 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3152 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3153 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3154 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3155 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3156 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3157
3158 extern uint64_t vm_fault_resilient_media_initiate;
3159 extern uint64_t vm_fault_resilient_media_retry;
3160 extern uint64_t vm_fault_resilient_media_proceed;
3161 extern uint64_t vm_fault_resilient_media_release;
3162 extern uint64_t vm_fault_resilient_media_abort1;
3163 extern uint64_t vm_fault_resilient_media_abort2;
3164 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3165 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3166 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3167 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3168 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3169 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3170 #if MACH_ASSERT
3171 extern int vm_fault_resilient_media_inject_error1_rate;
3172 extern int vm_fault_resilient_media_inject_error1;
3173 extern int vm_fault_resilient_media_inject_error2_rate;
3174 extern int vm_fault_resilient_media_inject_error2;
3175 extern int vm_fault_resilient_media_inject_error3_rate;
3176 extern int vm_fault_resilient_media_inject_error3;
3177 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3178 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3179 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3180 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3181 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3182 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3183 #endif /* MACH_ASSERT */
3184
3185 extern uint64_t pmap_query_page_info_retries;
3186 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3187
3188 /*
3189 * A sysctl which causes all existing shared regions to become stale. They
3190 * will no longer be used by anything new and will be torn down as soon as
3191 * the last existing user exits. A write of non-zero value causes that to happen.
3192 * This should only be used by launchd, so we check that this is initproc.
3193 */
3194 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3195 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3196 {
3197 unsigned int value = 0;
3198 int changed = 0;
3199 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3200 if (error || !changed) {
3201 return error;
3202 }
3203 if (current_proc() != initproc) {
3204 return EPERM;
3205 }
3206
3207 vm_shared_region_pivot();
3208
3209 return 0;
3210 }
3211
3212 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3213 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3214 0, 0, shared_region_pivot, "I", "");
3215
3216 extern uint64_t vm_object_shadow_forced;
3217 extern uint64_t vm_object_shadow_skipped;
3218 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3219 &vm_object_shadow_forced, "");
3220 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3221 &vm_object_shadow_skipped, "");
3222
3223
3224
3225 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3226 &vmtc_total, 0, "total text page corruptions detected");
3227
3228
3229 #if DEBUG || DEVELOPMENT
3230 /*
3231 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3232 * Used for testing text page self healing.
3233 */
3234 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3235 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3236 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3237 {
3238 uint64_t value = 0;
3239 int error = sysctl_handle_quad(oidp, &value, 0, req);
3240 if (error || !req->newptr) {
3241 return error;
3242 }
3243
3244 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3245 return 0;
3246 } else {
3247 return EINVAL;
3248 }
3249 }
3250
3251 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3252 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3253 0, 0, corrupt_text_addr, "-", "");
3254 #endif /* DEBUG || DEVELOPMENT */
3255
3256 #if CONFIG_MAP_RANGES
3257 /*
3258 * vm.malloc_ranges
3259 *
3260 * space-separated list of <left:right> hexadecimal addresses.
3261 */
3262 static int
3263 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
3264 {
3265 vm_map_t map = current_map();
3266 struct mach_vm_range r1, r2;
3267 char str[20 * 4];
3268 int len;
3269 mach_vm_offset_t right_hole_max;
3270
3271 if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
3272 return ENOENT;
3273 }
3274 if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
3275 return ENOENT;
3276 }
3277
3278 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
3279 right_hole_max = MACH_VM_JUMBO_ADDRESS;
3280 #else /* !XNU_TARGET_OS_IOS || !EXTENDED_USER_VA_SUPPORT */
3281 right_hole_max = get_map_max(map);
3282 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
3283
3284 len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
3285 r1.max_address, r2.min_address,
3286 r2.max_address, right_hole_max);
3287
3288 return SYSCTL_OUT(req, str, len);
3289 }
3290
3291 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
3292 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3293 0, 0, &vm_map_malloc_ranges, "A", "");
3294
3295 #if DEBUG || DEVELOPMENT
3296 static int
3297 vm_map_user_range_default SYSCTL_HANDLER_ARGS
3298 {
3299 #pragma unused(arg1, arg2, oidp)
3300 struct mach_vm_range range;
3301
3302 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
3303 != KERN_SUCCESS) {
3304 return EINVAL;
3305 }
3306
3307 return SYSCTL_OUT(req, &range, sizeof(range));
3308 }
3309
3310 static int
3311 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
3312 {
3313 #pragma unused(arg1, arg2, oidp)
3314 struct mach_vm_range range;
3315
3316 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
3317 != KERN_SUCCESS) {
3318 return EINVAL;
3319 }
3320
3321 return SYSCTL_OUT(req, &range, sizeof(range));
3322 }
3323
3324 static int
3325 vm_map_user_range_large_file SYSCTL_HANDLER_ARGS
3326 {
3327 #pragma unused(arg1, arg2, oidp)
3328 struct mach_vm_range range;
3329
3330 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_LARGE_FILE, &range)
3331 != KERN_SUCCESS) {
3332 return EINVAL;
3333 }
3334
3335 return SYSCTL_OUT(req, &range, sizeof(range));
3336 }
3337
3338 /*
3339 * A sysctl that can be used to return ranges for the current VM map.
3340 * Used for testing VM ranges.
3341 */
3342 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3343 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
3344 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3345 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
3346 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_large_file, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3347 0, 0, &vm_map_user_range_large_file, "S,mach_vm_range", "");
3348
3349 #endif /* DEBUG || DEVELOPMENT */
3350 #endif /* CONFIG_MAP_RANGES */
3351
3352 #if DEBUG || DEVELOPMENT
3353 #endif /* DEBUG || DEVELOPMENT */
3354
3355 extern uint64_t vm_map_range_overflows_count;
3356 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
3357 extern boolean_t vm_map_range_overflows_log;
3358 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
3359
3360 extern uint64_t c_seg_filled_no_contention;
3361 extern uint64_t c_seg_filled_contention;
3362 extern clock_sec_t c_seg_filled_contention_sec_max;
3363 extern clock_nsec_t c_seg_filled_contention_nsec_max;
3364 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
3365 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
3366 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
3367 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
3368 #if (XNU_TARGET_OS_OSX && __arm64__)
3369 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
3370 extern int c_process_major_yield_after; /* yield after moving ? segments */
3371 extern uint64_t c_process_major_reports;
3372 extern clock_sec_t c_process_major_max_sec;
3373 extern clock_nsec_t c_process_major_max_nsec;
3374 extern uint32_t c_process_major_peak_segcount;
3375 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
3376 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
3377 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
3378 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
3379 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
3380 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
3381 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3382
3383 #if DEVELOPMENT || DEBUG
3384 extern int panic_object_not_alive;
3385 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
3386 #endif /* DEVELOPMENT || DEBUG */
3387
3388 #if FBDP_DEBUG_OBJECT_NO_PAGER
3389 extern int fbdp_no_panic;
3390 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
3391 #endif /* MACH_ASSERT */
3392
3393 extern uint64_t cluster_direct_write_wired;
3394 SYSCTL_QUAD(_vm, OID_AUTO, cluster_direct_write_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &cluster_direct_write_wired, "");
3395
3396 extern uint64_t vm_object_pageout_not_on_queue;
3397 extern uint64_t vm_object_pageout_not_pageable;
3398 extern uint64_t vm_object_pageout_pageable;
3399 extern uint64_t vm_object_pageout_active_local;
3400 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_not_on_queue, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_not_on_queue, "");
3401 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_not_pageable, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_not_pageable, "");
3402 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_pageable, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_pageable, "");
3403 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_active_local, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_active_local, "");
3404
3405
3406 #if DEVELOPMENT || DEBUG
3407
3408 static uint32_t
sysctl_compressor_seg_magic(vm_c_serialize_add_data_t with_data)3409 sysctl_compressor_seg_magic(vm_c_serialize_add_data_t with_data)
3410 {
3411 #pragma unused(with_data)
3412 return VM_C_SEGMENT_INFO_MAGIC;
3413 }
3414
3415 /* The largest possible single segment + its slots is
3416 * (sizeof(c_segment_info) + C_SLOT_MAX_INDEX * sizeof(c_slot_info)) + (data of a single segment) */
3417 #define SYSCTL_SEG_BUF_SIZE (8 * 1024 + 64 * 1024)
3418
3419 extern uint32_t c_segments_available;
3420
3421 struct sysctl_buf_header {
3422 uint32_t magic;
3423 } __attribute__((packed));
3424
3425 /* This sysctl iterates over the populated c_segments and writes some info about each one and its slots.
3426 * instead of doing everything here, the function calls a function vm_compressor.c. */
3427 static int
sysctl_compressor_segments_stream(struct sysctl_req * req,vm_c_serialize_add_data_t with_data)3428 sysctl_compressor_segments_stream(struct sysctl_req *req, vm_c_serialize_add_data_t with_data)
3429 {
3430 char* buf = kalloc_data(SYSCTL_SEG_BUF_SIZE, Z_WAITOK | Z_ZERO);
3431 if (!buf) {
3432 return ENOMEM;
3433 }
3434 size_t offset = 0;
3435 int error = 0;
3436 int segno = 0;
3437 /* 4 byte header to identify the version of the formatting of the data.
3438 * This should be incremented if c_segment_info or c_slot_info are changed */
3439 ((struct sysctl_buf_header*)buf)->magic = sysctl_compressor_seg_magic(with_data);
3440 offset += sizeof(uint32_t);
3441
3442 while (segno < c_segments_available) {
3443 size_t left_sz = SYSCTL_SEG_BUF_SIZE - offset;
3444 kern_return_t kr = vm_compressor_serialize_segment_debug_info(segno, buf + offset, &left_sz, with_data);
3445 if (kr == KERN_NO_SPACE) {
3446 /* failed to add another segment, push the current buffer out and try again */
3447 if (offset == 0) {
3448 error = EINVAL; /* no space to write but I didn't write anything, shouldn't really happen */
3449 goto out;
3450 }
3451 /* write out chunk */
3452 error = SYSCTL_OUT(req, buf, offset);
3453 if (error) {
3454 goto out;
3455 }
3456 offset = 0;
3457 bzero(buf, SYSCTL_SEG_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3458 /* don't increment segno, need to try again saving the current one */
3459 } else if (kr != KERN_SUCCESS) {
3460 error = EINVAL;
3461 goto out;
3462 } else {
3463 offset += left_sz;
3464 ++segno;
3465 assert(offset <= SYSCTL_SEG_BUF_SIZE);
3466 }
3467 }
3468
3469 if (offset > 0) { /* write last chunk */
3470 error = SYSCTL_OUT(req, buf, offset);
3471 }
3472
3473 out:
3474 kfree_data(buf, SYSCTL_SEG_BUF_SIZE)
3475 return error;
3476 }
3477
3478 static int
sysctl_compressor_segments(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3479 sysctl_compressor_segments(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3480 {
3481 return sysctl_compressor_segments_stream(req, VM_C_SERIALIZE_DATA_NONE);
3482 }
3483 SYSCTL_PROC(_vm, OID_AUTO, compressor_segments, CTLTYPE_STRUCT | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_compressor_segments, "S", "");
3484
3485
3486 extern uint32_t vm_compressor_fragmentation_level(void);
3487
3488 static int
sysctl_compressor_fragmentation_level(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3489 sysctl_compressor_fragmentation_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3490 {
3491 uint32_t value = vm_compressor_fragmentation_level();
3492 return SYSCTL_OUT(req, &value, sizeof(value));
3493 }
3494
3495 SYSCTL_PROC(_vm, OID_AUTO, compressor_fragmentation_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_compressor_fragmentation_level, "IU", "");
3496
3497 extern uint32_t vm_compressor_incore_fragmentation_wasted_pages(void);
3498
3499 static int
sysctl_compressor_incore_fragmentation_wasted_pages(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3500 sysctl_compressor_incore_fragmentation_wasted_pages(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3501 {
3502 uint32_t value = vm_compressor_incore_fragmentation_wasted_pages();
3503 return SYSCTL_OUT(req, &value, sizeof(value));
3504 }
3505
3506 SYSCTL_PROC(_vm, OID_AUTO, compressor_incore_fragmentation_wasted_pages, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_compressor_incore_fragmentation_wasted_pages, "IU", "");
3507
3508
3509
3510 #define SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE (8 * 1024)
3511
3512
3513 /* This sysctl iterates over all the entries of the vm_map of the a given process and write some info about the vm_object pointed by the entries.
3514 * This can be used for mapping where are all the pages of a process located in the compressor.
3515 */
3516 static int
sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)3517 sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3518 {
3519 int error = 0;
3520 char *buf = NULL;
3521 proc_t p = PROC_NULL;
3522 task_t task = TASK_NULL;
3523 vm_map_t map = VM_MAP_NULL;
3524 __block size_t offset = 0;
3525
3526 /* go from pid to proc to task to vm_map. see sysctl_procargsx() for another example of this procession */
3527 int *name = arg1;
3528 int namelen = arg2;
3529 if (namelen < 1) {
3530 return EINVAL;
3531 }
3532 int pid = name[0];
3533 p = proc_find(pid); /* this increments a reference to the proc */
3534 if (p == PROC_NULL) {
3535 return EINVAL;
3536 }
3537 task = proc_task(p);
3538 proc_rele(p); /* decrement ref of proc */
3539 p = PROC_NULL;
3540 if (task == TASK_NULL) {
3541 return EINVAL;
3542 }
3543 /* convert proc reference to task reference */
3544 task_reference(task);
3545 /* task reference to map reference */
3546 map = get_task_map_reference(task);
3547 task_deallocate(task);
3548
3549 if (map == VM_MAP_NULL) {
3550 return EINVAL; /* nothing allocated yet */
3551 }
3552
3553 buf = kalloc_data(SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE, Z_WAITOK | Z_ZERO);
3554 if (!buf) {
3555 error = ENOMEM;
3556 goto out;
3557 }
3558
3559 /* 4 byte header to identify the version of the formatting of the data.
3560 * This should be incremented if c_segment_info or c_slot_info are changed */
3561 ((struct sysctl_buf_header*)buf)->magic = VM_MAP_ENTRY_INFO_MAGIC;
3562 offset += sizeof(uint32_t);
3563
3564 kern_return_t (^write_header)(int) = ^kern_return_t (int nentries) {
3565 /* write the header, happens only once at the beginning so we should have enough space */
3566 assert(offset + sizeof(struct vm_map_info_hdr) < SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE);
3567 struct vm_map_info_hdr* out_hdr = (struct vm_map_info_hdr*)(buf + offset);
3568 out_hdr->vmi_nentries = nentries;
3569 offset += sizeof(struct vm_map_info_hdr);
3570 return KERN_SUCCESS;
3571 };
3572
3573 kern_return_t (^write_entry)(void*) = ^kern_return_t (void* entry) {
3574 while (true) { /* try up to 2 times, first try write the the current buffer, otherwise to a new buffer */
3575 size_t left_sz = SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE - offset;
3576 kern_return_t kr = vm_map_dump_entry_and_compressor_pager(entry, buf + offset, &left_sz);
3577 if (kr == KERN_NO_SPACE) {
3578 /* failed to write anything, flush the current buffer and try again */
3579 if (offset == 0) {
3580 return KERN_FAILURE; /* no space to write but I didn't write anything yet, shouldn't really happen */
3581 }
3582 /* write out chunk */
3583 int out_error = SYSCTL_OUT(req, buf, offset);
3584 if (out_error) {
3585 return KERN_FAILURE;
3586 }
3587 offset = 0;
3588 bzero(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3589 continue; /* need to retry the entry dump again with the cleaned buffer */
3590 } else if (kr != KERN_SUCCESS) {
3591 return kr;
3592 }
3593 offset += left_sz;
3594 break;
3595 }
3596 return KERN_SUCCESS;
3597 };
3598
3599 /* this foreach first calls to the first callback with the number of entries, then calls the second for every entry
3600 * when the buffer is exhausted, it is flushed to the sysctl and restarted */
3601 kern_return_t kr = vm_map_entries_foreach(map, write_header, write_entry);
3602
3603 if (kr != KERN_SUCCESS) {
3604 goto out;
3605 }
3606
3607 if (offset > 0) { /* last chunk */
3608 error = SYSCTL_OUT(req, buf, offset);
3609 }
3610
3611 out:
3612 if (buf != NULL) {
3613 kfree_data(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE)
3614 }
3615 if (map != NULL) {
3616 vm_map_deallocate(map);
3617 }
3618 return error;
3619 }
3620
3621 SYSCTL_PROC(_vm, OID_AUTO, task_vm_objects_slotmap, CTLTYPE_NODE | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_task_vm_objects_slotmap, "S", "");
3622
3623
3624
3625 #endif /* DEVELOPMENT || DEBUG */
3626