1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97
98 #include <kern/kalloc.h>
99 #include <vm/vm_map_internal.h>
100 #include <vm/vm_kern_xnu.h>
101 #include <vm/vm_pageout_xnu.h>
102
103 #include <mach/shared_region.h>
104 #include <vm/vm_shared_region_internal.h>
105
106 #include <vm/vm_dyld_pager_internal.h>
107 #include <vm/vm_protos_internal.h>
108 #if DEVELOPMENT || DEBUG
109 #include <vm/vm_compressor_info.h> /* for c_segment_info */
110 #include <vm/vm_compressor_xnu.h> /* for vm_compressor_serialize_segment_debug_info() */
111 #endif
112 #include <vm/vm_reclaim_xnu.h>
113
114 #include <sys/kern_memorystatus.h>
115 #include <sys/kern_memorystatus_freeze.h>
116 #include <sys/proc_internal.h>
117
118 #include <mach-o/fixup-chains.h>
119
120 #if CONFIG_MACF
121 #include <security/mac_framework.h>
122 #endif
123
124 #include <kern/bits.h>
125
126 #if CONFIG_CSR
127 #include <sys/csr.h>
128 #endif /* CONFIG_CSR */
129 #include <sys/trust_caches.h>
130 #include <libkern/amfi/amfi.h>
131 #include <IOKit/IOBSD.h>
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136
137 #if DEVELOPMENT || DEBUG
138
139 static int
140 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
141 {
142 #pragma unused(arg1, arg2)
143 vm_offset_t kaddr;
144 kern_return_t kr;
145 int error = 0;
146 int size = 0;
147
148 error = sysctl_handle_int(oidp, &size, 0, req);
149 if (error || !req->newptr) {
150 return error;
151 }
152
153 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
154 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
155
156 if (kr == KERN_SUCCESS) {
157 kmem_free(kernel_map, kaddr, size);
158 }
159
160 return error;
161 }
162
163 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
164 0, 0, &sysctl_kmem_alloc_contig, "I", "");
165
166 extern int vm_region_footprint;
167 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
168
169 static int
170 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
171 {
172 #pragma unused(arg1, arg2, oidp)
173 kmem_gobj_stats stats = kmem_get_gobj_stats();
174
175 return SYSCTL_OUT(req, &stats, sizeof(stats));
176 }
177
178 SYSCTL_PROC(_vm, OID_AUTO, kmem_gobj_stats,
179 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
180 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
181
182 #endif /* DEVELOPMENT || DEBUG */
183
184 static int
185 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
186 {
187 #pragma unused(arg1, arg2, oidp)
188 int error = 0;
189 int value;
190
191 value = task_self_region_footprint();
192 error = SYSCTL_OUT(req, &value, sizeof(int));
193 if (error) {
194 return error;
195 }
196
197 if (!req->newptr) {
198 return 0;
199 }
200
201 error = SYSCTL_IN(req, &value, sizeof(int));
202 if (error) {
203 return error;
204 }
205 task_self_region_footprint_set(value);
206 return 0;
207 }
208 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
209
210 static int
211 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
212 {
213 #pragma unused(arg1, arg2, oidp)
214 int error = 0;
215 int value;
216
217 value = (1 << thread_self_region_page_shift());
218 error = SYSCTL_OUT(req, &value, sizeof(int));
219 if (error) {
220 return error;
221 }
222
223 if (!req->newptr) {
224 return 0;
225 }
226
227 error = SYSCTL_IN(req, &value, sizeof(int));
228 if (error) {
229 return error;
230 }
231
232 if (value != 0 && value != 4096 && value != 16384) {
233 return EINVAL;
234 }
235
236 #if !__ARM_MIXED_PAGE_SIZE__
237 if (value != vm_map_page_size(current_map())) {
238 return EINVAL;
239 }
240 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
241
242 thread_self_region_page_shift_set(bit_first(value));
243 return 0;
244 }
245 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
246
247 static int
248 sysctl_vm_self_region_info_flags SYSCTL_HANDLER_ARGS
249 {
250 #pragma unused(arg1, arg2, oidp)
251 int error = 0;
252 int value;
253 kern_return_t kr;
254
255 value = task_self_region_info_flags();
256 error = SYSCTL_OUT(req, &value, sizeof(int));
257 if (error) {
258 return error;
259 }
260
261 if (!req->newptr) {
262 return 0;
263 }
264
265 error = SYSCTL_IN(req, &value, sizeof(int));
266 if (error) {
267 return error;
268 }
269
270 kr = task_self_region_info_flags_set(value);
271 if (kr != KERN_SUCCESS) {
272 return EINVAL;
273 }
274
275 return 0;
276 }
277 SYSCTL_PROC(_vm, OID_AUTO, self_region_info_flags, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_info_flags, "I", "");
278
279
280 #if DEVELOPMENT || DEBUG
281 extern int panic_on_unsigned_execute;
282 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
283
284 extern int vm_log_xnu_user_debug;
285 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
286 #endif /* DEVELOPMENT || DEBUG */
287
288 extern int vm_log_map_delete_permanent_prot_none;
289 SYSCTL_INT(_vm, OID_AUTO, log_map_delete_permanent_prot_none, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_map_delete_permanent_prot_none, 0, "");
290
291 extern int cs_executable_create_upl;
292 extern int cs_executable_wire;
293 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
294 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
295
296 extern int apple_protect_pager_count;
297 extern int apple_protect_pager_count_mapped;
298 extern unsigned int apple_protect_pager_cache_limit;
299 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
300 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
301 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
302
303 #if DEVELOPMENT || DEBUG
304 extern int radar_20146450;
305 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
306
307 extern int macho_printf;
308 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
309
310 extern int apple_protect_pager_data_request_debug;
311 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
312
313 #if __arm64__
314 /* These are meant to support the page table accounting unit test. */
315 extern unsigned int arm_hardware_page_size;
316 extern unsigned int arm_pt_desc_size;
317 extern unsigned int arm_pt_root_size;
318 extern unsigned int inuse_user_tteroot_count;
319 extern unsigned int inuse_kernel_tteroot_count;
320 extern unsigned int inuse_user_ttepages_count;
321 extern unsigned int inuse_kernel_ttepages_count;
322 extern unsigned int inuse_user_ptepages_count;
323 extern unsigned int inuse_kernel_ptepages_count;
324 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
325 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
326 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
327 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
328 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
329 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
330 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
331 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
332 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
333 #if !CONFIG_SPTM
334 extern unsigned int free_page_size_tt_count;
335 extern unsigned int free_tt_count;
336 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
337 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
338 #endif
339 #if DEVELOPMENT || DEBUG
340 extern unsigned long pmap_asid_flushes;
341 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
342 extern unsigned long pmap_asid_hits;
343 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
344 extern unsigned long pmap_asid_misses;
345 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
346 extern unsigned long pmap_speculation_restrictions;
347 SYSCTL_ULONG(_vm, OID_AUTO, pmap_speculation_restrictions, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_speculation_restrictions, "");
348 #endif
349 #endif /* __arm64__ */
350 #endif /* DEVELOPMENT || DEBUG */
351
352 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
353 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
354 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
355 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
356 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
357 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
358 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
359 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
360 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
361 #if VM_SCAN_FOR_SHADOW_CHAIN
362 static int vm_shadow_max_enabled = 0; /* Disabled by default */
363 extern int proc_shadow_max(void);
364 static int
365 vm_shadow_max SYSCTL_HANDLER_ARGS
366 {
367 #pragma unused(arg1, arg2, oidp)
368 int value = 0;
369
370 if (vm_shadow_max_enabled) {
371 value = proc_shadow_max();
372 }
373
374 return SYSCTL_OUT(req, &value, sizeof(value));
375 }
376 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
377 0, 0, &vm_shadow_max, "I", "");
378
379 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
380
381 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
382
383 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
384
385 #if PAGE_SLEEP_WITH_INHERITOR
386 #if DEVELOPMENT || DEBUG
387 extern uint32_t page_worker_table_size;
388 SYSCTL_INT(_vm, OID_AUTO, page_worker_table_size, CTLFLAG_RD | CTLFLAG_LOCKED, &page_worker_table_size, 0, "");
389 SCALABLE_COUNTER_DECLARE(page_worker_hash_collisions);
390 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_hash_collisions, page_worker_hash_collisions, "");
391 SCALABLE_COUNTER_DECLARE(page_worker_inheritor_sleeps);
392 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_inheritor_sleeps, page_worker_inheritor_sleeps, "");
393 #endif /* DEVELOPMENT || DEBUG */
394 #endif /* PAGE_SLEEP_WITH_INHERITOR */
395
396 /*
397 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
398 */
399
400 #if DEVELOPMENT || DEBUG
401 extern int allow_stack_exec, allow_data_exec;
402
403 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
404 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
405
406 #endif /* DEVELOPMENT || DEBUG */
407
408 static const char *prot_values[] = {
409 "none",
410 "read-only",
411 "write-only",
412 "read-write",
413 "execute-only",
414 "read-execute",
415 "write-execute",
416 "read-write-execute"
417 };
418
419 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)420 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
421 {
422 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
423 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
424 }
425
426 /*
427 * shared_region_unnest_logging: level of logging of unnesting events
428 * 0 - no logging
429 * 1 - throttled logging of unexpected unnesting events (default)
430 * 2 - unthrottled logging of unexpected unnesting events
431 * 3+ - unthrottled logging of all unnesting events
432 */
433 int shared_region_unnest_logging = 1;
434
435 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
436 &shared_region_unnest_logging, 0, "");
437
438 int vm_shared_region_unnest_log_interval = 10;
439 int shared_region_unnest_log_count_threshold = 5;
440
441
442 #if XNU_TARGET_OS_OSX
443
444 #if defined (__x86_64__)
445 static int scdir_enforce = 1;
446 #else /* defined (__x86_64__) */
447 static int scdir_enforce = 0; /* AOT caches live elsewhere */
448 #endif /* defined (__x86_64__) */
449
450 static char *scdir_path[] = {
451 "/System/Library/dyld/",
452 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
453 "/System/Cryptexes/OS/System/Library/dyld",
454 NULL
455 };
456
457 #else /* XNU_TARGET_OS_OSX */
458
459 static int scdir_enforce = 0;
460 static char *scdir_path[] = {
461 "/System/Library/Caches/com.apple.dyld/",
462 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
463 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
464 NULL
465 };
466
467 #endif /* XNU_TARGET_OS_OSX */
468
469 static char *driverkit_scdir_path[] = {
470 "/System/DriverKit/System/Library/dyld/",
471 #if XNU_TARGET_OS_OSX
472 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
473 #else
474 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
475 #endif /* XNU_TARGET_OS_OSX */
476 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
477 NULL
478 };
479
480 #ifndef SECURE_KERNEL
481 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
482 {
483 #if CONFIG_CSR
484 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
485 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
486 return EPERM;
487 }
488 #endif /* CONFIG_CSR */
489 return sysctl_handle_int(oidp, arg1, arg2, req);
490 }
491
492 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
493 #endif
494
495 /* These log rate throttling state variables aren't thread safe, but
496 * are sufficient unto the task.
497 */
498 static int64_t last_unnest_log_time = 0;
499 static int shared_region_unnest_log_count = 0;
500
501 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)502 log_unnest_badness(
503 vm_map_t m,
504 vm_map_offset_t s,
505 vm_map_offset_t e,
506 boolean_t is_nested_map,
507 vm_map_offset_t lowest_unnestable_addr)
508 {
509 struct timeval tv;
510
511 if (shared_region_unnest_logging == 0) {
512 return;
513 }
514
515 if (shared_region_unnest_logging <= 2 &&
516 is_nested_map &&
517 s >= lowest_unnestable_addr) {
518 /*
519 * Unnesting of writable map entries is fine.
520 */
521 return;
522 }
523
524 if (shared_region_unnest_logging <= 1) {
525 microtime(&tv);
526 if ((tv.tv_sec - last_unnest_log_time) <
527 vm_shared_region_unnest_log_interval) {
528 if (shared_region_unnest_log_count++ >
529 shared_region_unnest_log_count_threshold) {
530 return;
531 }
532 } else {
533 last_unnest_log_time = tv.tv_sec;
534 shared_region_unnest_log_count = 0;
535 }
536 }
537
538 DTRACE_VM4(log_unnest_badness,
539 vm_map_t, m,
540 vm_map_offset_t, s,
541 vm_map_offset_t, e,
542 vm_map_offset_t, lowest_unnestable_addr);
543 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
544 }
545
546 uint64_t
vm_purge_filebacked_pagers(void)547 vm_purge_filebacked_pagers(void)
548 {
549 uint64_t pages_purged;
550
551 pages_purged = 0;
552 pages_purged += apple_protect_pager_purge_all();
553 pages_purged += shared_region_pager_purge_all();
554 pages_purged += dyld_pager_purge_all();
555 #if DEVELOPMENT || DEBUG
556 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
557 #endif /* DEVELOPMENT || DEBUG */
558 return pages_purged;
559 }
560
561 int
useracc(user_addr_ut addr_u,user_size_ut len_u,int prot)562 useracc(
563 user_addr_ut addr_u,
564 user_size_ut len_u,
565 int prot)
566 {
567 vm_map_t map;
568 vm_prot_t vm_prot = VM_PROT_WRITE;
569
570 map = current_map();
571
572 if (prot == B_READ) {
573 vm_prot = VM_PROT_READ;
574 }
575
576 return vm_map_check_protection(map, addr_u,
577 vm_sanitize_compute_ut_end(addr_u, len_u), vm_prot,
578 VM_SANITIZE_CALLER_USERACC);
579 }
580
581 #if XNU_PLATFORM_MacOSX
582 static __attribute__((always_inline, warn_unused_result))
583 kern_return_t
vslock_sanitize(vm_map_t map,user_addr_ut addr_u,user_size_ut len_u,vm_sanitize_caller_t vm_sanitize_caller,vm_map_offset_t * start,vm_map_offset_t * end,vm_map_size_t * size)584 vslock_sanitize(
585 vm_map_t map,
586 user_addr_ut addr_u,
587 user_size_ut len_u,
588 vm_sanitize_caller_t vm_sanitize_caller,
589 vm_map_offset_t *start,
590 vm_map_offset_t *end,
591 vm_map_size_t *size)
592 {
593 return vm_sanitize_addr_size(addr_u, len_u, vm_sanitize_caller,
594 map,
595 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS, start, end,
596 size);
597 }
598 #endif /* XNU_PLATFORM_MacOSX */
599
600 int
vslock(user_addr_ut addr,user_size_ut len)601 vslock(user_addr_ut addr, user_size_ut len)
602 {
603 kern_return_t kret;
604
605 #if XNU_PLATFORM_MacOSX
606 /*
607 * Preserve previous behavior on macOS for overflows due to bin
608 * compatibility i.e. return success for overflows without doing
609 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
610 * for overflow errors which gets converted to KERN_SUCCESS by
611 * vm_sanitize_get_kr.
612 */
613 vm_map_offset_t start, end;
614 vm_map_size_t size;
615
616 kret = vslock_sanitize(current_map(),
617 addr,
618 len,
619 VM_SANITIZE_CALLER_VSLOCK,
620 &start,
621 &end,
622 &size);
623 if (__improbable(kret != KERN_SUCCESS)) {
624 switch (vm_sanitize_get_kr(kret)) {
625 case KERN_SUCCESS:
626 return 0;
627 case KERN_INVALID_ADDRESS:
628 case KERN_NO_SPACE:
629 return ENOMEM;
630 case KERN_PROTECTION_FAILURE:
631 return EACCES;
632 default:
633 return EINVAL;
634 }
635 }
636 #endif /* XNU_PLATFORM_MacOSX */
637
638 kret = vm_map_wire_kernel(current_map(), addr,
639 vm_sanitize_compute_ut_end(addr, len),
640 vm_sanitize_wrap_prot(VM_PROT_READ | VM_PROT_WRITE),
641 VM_KERN_MEMORY_BSD,
642 FALSE);
643
644 switch (kret) {
645 case KERN_SUCCESS:
646 return 0;
647 case KERN_INVALID_ADDRESS:
648 case KERN_NO_SPACE:
649 return ENOMEM;
650 case KERN_PROTECTION_FAILURE:
651 return EACCES;
652 default:
653 return EINVAL;
654 }
655 }
656
657 int
vsunlock(user_addr_ut addr,user_size_ut len,__unused int dirtied)658 vsunlock(user_addr_ut addr, user_size_ut len, __unused int dirtied)
659 {
660 #if FIXME /* [ */
661 pmap_t pmap;
662 vm_page_t pg;
663 vm_map_offset_t vaddr;
664 ppnum_t paddr;
665 #endif /* FIXME ] */
666 kern_return_t kret;
667 vm_map_t map;
668
669 map = current_map();
670
671 #if FIXME /* [ */
672 if (dirtied) {
673 pmap = get_task_pmap(current_task());
674 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
675 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
676 vaddr += PAGE_SIZE) {
677 paddr = pmap_find_phys(pmap, vaddr);
678 pg = PHYS_TO_VM_PAGE(paddr);
679 vm_page_set_modified(pg);
680 }
681 }
682 #endif /* FIXME ] */
683 #ifdef lint
684 dirtied++;
685 #endif /* lint */
686
687 #if XNU_PLATFORM_MacOSX
688 /*
689 * Preserve previous behavior on macOS for overflows due to bin
690 * compatibility i.e. return success for overflows without doing
691 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
692 * for overflow errors which gets converted to KERN_SUCCESS by
693 * vm_sanitize_get_kr.
694 */
695 vm_map_offset_t start, end;
696 vm_map_size_t size;
697
698 kret = vslock_sanitize(map,
699 addr,
700 len,
701 VM_SANITIZE_CALLER_VSUNLOCK,
702 &start,
703 &end,
704 &size);
705 if (__improbable(kret != KERN_SUCCESS)) {
706 switch (vm_sanitize_get_kr(kret)) {
707 case KERN_SUCCESS:
708 return 0;
709 case KERN_INVALID_ADDRESS:
710 case KERN_NO_SPACE:
711 return ENOMEM;
712 case KERN_PROTECTION_FAILURE:
713 return EACCES;
714 default:
715 return EINVAL;
716 }
717 }
718 #endif /* XNU_PLATFORM_MacOSX */
719
720 kret = vm_map_unwire(map, addr,
721 vm_sanitize_compute_ut_end(addr, len), false);
722 switch (kret) {
723 case KERN_SUCCESS:
724 return 0;
725 case KERN_INVALID_ADDRESS:
726 case KERN_NO_SPACE:
727 return ENOMEM;
728 case KERN_PROTECTION_FAILURE:
729 return EACCES;
730 default:
731 return EINVAL;
732 }
733 }
734
735 int
subyte(user_addr_t addr,int byte)736 subyte(
737 user_addr_t addr,
738 int byte)
739 {
740 char character;
741
742 character = (char)byte;
743 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
744 }
745
746 int
suibyte(user_addr_t addr,int byte)747 suibyte(
748 user_addr_t addr,
749 int byte)
750 {
751 char character;
752
753 character = (char)byte;
754 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
755 }
756
757 int
fubyte(user_addr_t addr)758 fubyte(user_addr_t addr)
759 {
760 unsigned char byte;
761
762 if (copyin(addr, (void *) &byte, sizeof(char))) {
763 return -1;
764 }
765 return byte;
766 }
767
768 int
fuibyte(user_addr_t addr)769 fuibyte(user_addr_t addr)
770 {
771 unsigned char byte;
772
773 if (copyin(addr, (void *) &(byte), sizeof(char))) {
774 return -1;
775 }
776 return byte;
777 }
778
779 int
suword(user_addr_t addr,long word)780 suword(
781 user_addr_t addr,
782 long word)
783 {
784 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
785 }
786
787 long
fuword(user_addr_t addr)788 fuword(user_addr_t addr)
789 {
790 long word = 0;
791
792 if (copyin(addr, (void *) &word, sizeof(int))) {
793 return -1;
794 }
795 return word;
796 }
797
798 /* suiword and fuiword are the same as suword and fuword, respectively */
799
800 int
suiword(user_addr_t addr,long word)801 suiword(
802 user_addr_t addr,
803 long word)
804 {
805 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
806 }
807
808 long
fuiword(user_addr_t addr)809 fuiword(user_addr_t addr)
810 {
811 long word = 0;
812
813 if (copyin(addr, (void *) &word, sizeof(int))) {
814 return -1;
815 }
816 return word;
817 }
818
819 /*
820 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
821 * fetching and setting of process-sized size_t and pointer values.
822 */
823 int
sulong(user_addr_t addr,int64_t word)824 sulong(user_addr_t addr, int64_t word)
825 {
826 if (IS_64BIT_PROCESS(current_proc())) {
827 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
828 } else {
829 return suiword(addr, (long)word);
830 }
831 }
832
833 int64_t
fulong(user_addr_t addr)834 fulong(user_addr_t addr)
835 {
836 int64_t longword;
837
838 if (IS_64BIT_PROCESS(current_proc())) {
839 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
840 return -1;
841 }
842 return longword;
843 } else {
844 return (int64_t)fuiword(addr);
845 }
846 }
847
848 int
suulong(user_addr_t addr,uint64_t uword)849 suulong(user_addr_t addr, uint64_t uword)
850 {
851 if (IS_64BIT_PROCESS(current_proc())) {
852 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
853 } else {
854 return suiword(addr, (uint32_t)uword);
855 }
856 }
857
858 uint64_t
fuulong(user_addr_t addr)859 fuulong(user_addr_t addr)
860 {
861 uint64_t ulongword;
862
863 if (IS_64BIT_PROCESS(current_proc())) {
864 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
865 return -1ULL;
866 }
867 return ulongword;
868 } else {
869 return (uint64_t)fuiword(addr);
870 }
871 }
872
873 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)874 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
875 {
876 return ENOTSUP;
877 }
878
879 #if defined(SECURE_KERNEL)
880 static int kern_secure_kernel = 1;
881 #else
882 static int kern_secure_kernel = 0;
883 #endif
884
885 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
886 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
887 &shared_region_trace_level, 0, "");
888 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
889 &shared_region_version, 0, "");
890 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
891 &shared_region_persistence, 0, "");
892
893 /*
894 * shared_region_check_np:
895 *
896 * This system call is intended for dyld.
897 *
898 * dyld calls this when any process starts to see if the process's shared
899 * region is already set up and ready to use.
900 * This call returns the base address of the first mapping in the
901 * process's shared region's first mapping.
902 * dyld will then check what's mapped at that address.
903 *
904 * If the shared region is empty, dyld will then attempt to map the shared
905 * cache file in the shared region via the shared_region_map_np() system call.
906 *
907 * If something's already mapped in the shared region, dyld will check if it
908 * matches the shared cache it would like to use for that process.
909 * If it matches, evrything's ready and the process can proceed and use the
910 * shared region.
911 * If it doesn't match, dyld will unmap the shared region and map the shared
912 * cache into the process's address space via mmap().
913 *
914 * A NULL pointer argument can be used by dyld to indicate it has unmapped
915 * the shared region. We will remove the shared_region reference from the task.
916 *
917 * ERROR VALUES
918 * EINVAL no shared region
919 * ENOMEM shared region is empty
920 * EFAULT bad address for "start_address"
921 */
922 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)923 shared_region_check_np(
924 __unused struct proc *p,
925 struct shared_region_check_np_args *uap,
926 __unused int *retvalp)
927 {
928 vm_shared_region_t shared_region;
929 mach_vm_offset_t start_address = 0;
930 int error = 0;
931 kern_return_t kr;
932 task_t task = current_task();
933
934 SHARED_REGION_TRACE_DEBUG(
935 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
936 (void *)VM_KERNEL_ADDRPERM(current_thread()),
937 proc_getpid(p), p->p_comm,
938 (uint64_t)uap->start_address));
939
940 /*
941 * Special value of start_address used to indicate that map_with_linking() should
942 * no longer be allowed in this process
943 */
944 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
945 p->p_disallow_map_with_linking = TRUE;
946 return 0;
947 }
948
949 /* retrieve the current tasks's shared region */
950 shared_region = vm_shared_region_get(task);
951 if (shared_region != NULL) {
952 /*
953 * A NULL argument is used by dyld to indicate the task
954 * has unmapped its shared region.
955 */
956 if (uap->start_address == 0) {
957 /* unmap it first */
958 vm_shared_region_remove(task, shared_region);
959 vm_shared_region_set(task, NULL);
960 } else {
961 /* retrieve address of its first mapping... */
962 kr = vm_shared_region_start_address(shared_region, &start_address, task);
963 if (kr != KERN_SUCCESS) {
964 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
965 "check_np(0x%llx) "
966 "vm_shared_region_start_address() failed\n",
967 (void *)VM_KERNEL_ADDRPERM(current_thread()),
968 proc_getpid(p), p->p_comm,
969 (uint64_t)uap->start_address));
970 error = ENOMEM;
971 } else {
972 #if __has_feature(ptrauth_calls)
973 /*
974 * Remap any section of the shared library that
975 * has authenticated pointers into private memory.
976 */
977 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
978 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
979 "check_np(0x%llx) "
980 "vm_shared_region_auth_remap() failed\n",
981 (void *)VM_KERNEL_ADDRPERM(current_thread()),
982 proc_getpid(p), p->p_comm,
983 (uint64_t)uap->start_address));
984 error = ENOMEM;
985 }
986 #endif /* __has_feature(ptrauth_calls) */
987
988 /* ... and give it to the caller */
989 if (error == 0) {
990 error = copyout(&start_address,
991 (user_addr_t) uap->start_address,
992 sizeof(start_address));
993 if (error != 0) {
994 SHARED_REGION_TRACE_ERROR(
995 ("shared_region: %p [%d(%s)] "
996 "check_np(0x%llx) "
997 "copyout(0x%llx) error %d\n",
998 (void *)VM_KERNEL_ADDRPERM(current_thread()),
999 proc_getpid(p), p->p_comm,
1000 (uint64_t)uap->start_address, (uint64_t)start_address,
1001 error));
1002 }
1003 }
1004 }
1005 }
1006 vm_shared_region_deallocate(shared_region);
1007 } else {
1008 /* no shared region ! */
1009 error = EINVAL;
1010 }
1011
1012 SHARED_REGION_TRACE_DEBUG(
1013 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1014 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1015 proc_getpid(p), p->p_comm,
1016 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1017
1018 return error;
1019 }
1020
1021
1022 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)1023 shared_region_copyin(
1024 struct proc *p,
1025 user_addr_t user_addr,
1026 unsigned int count,
1027 unsigned int element_size,
1028 void *kernel_data)
1029 {
1030 int error = 0;
1031 vm_size_t size = count * element_size;
1032
1033 error = copyin(user_addr, kernel_data, size);
1034 if (error) {
1035 SHARED_REGION_TRACE_ERROR(
1036 ("shared_region: %p [%d(%s)] map(): "
1037 "copyin(0x%llx, %ld) failed (error=%d)\n",
1038 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1039 proc_getpid(p), p->p_comm,
1040 (uint64_t)user_addr, (long)size, error));
1041 }
1042 return error;
1043 }
1044
1045 /*
1046 * A reasonable upper limit to prevent overflow of allocation/copyin.
1047 */
1048 #define _SR_FILE_MAPPINGS_MAX_FILES 256
1049
1050 /* forward declaration */
1051 __attribute__((noinline))
1052 static void shared_region_map_and_slide_cleanup(
1053 struct proc *p,
1054 uint32_t files_count,
1055 struct _sr_file_mappings *sr_file_mappings,
1056 struct vm_shared_region *shared_region);
1057
1058 /*
1059 * Setup part of _shared_region_map_and_slide().
1060 * It had to be broken out of _shared_region_map_and_slide() to
1061 * prevent compiler inlining from blowing out the stack.
1062 */
1063 __attribute__((noinline))
1064 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)1065 shared_region_map_and_slide_setup(
1066 struct proc *p,
1067 uint32_t files_count,
1068 struct shared_file_np *files,
1069 uint32_t mappings_count,
1070 struct shared_file_mapping_slide_np *mappings,
1071 struct _sr_file_mappings **sr_file_mappings,
1072 struct vm_shared_region **shared_region_ptr,
1073 struct vnode *rdir_vp)
1074 {
1075 int error = 0;
1076 struct _sr_file_mappings *srfmp;
1077 uint32_t mappings_next;
1078 struct vnode_attr va;
1079 off_t fs;
1080 #if CONFIG_MACF
1081 vm_prot_t maxprot = VM_PROT_ALL;
1082 #endif
1083 uint32_t i;
1084 struct vm_shared_region *shared_region = NULL;
1085 boolean_t is_driverkit = task_is_driver(current_task());
1086
1087 SHARED_REGION_TRACE_DEBUG(
1088 ("shared_region: %p [%d(%s)] -> map\n",
1089 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1090 proc_getpid(p), p->p_comm));
1091
1092 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
1093 error = E2BIG;
1094 goto done;
1095 }
1096 if (files_count == 0) {
1097 error = EINVAL;
1098 goto done;
1099 }
1100 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
1101 Z_WAITOK | Z_ZERO);
1102 if (*sr_file_mappings == NULL) {
1103 error = ENOMEM;
1104 goto done;
1105 }
1106 mappings_next = 0;
1107 for (i = 0; i < files_count; i++) {
1108 srfmp = &(*sr_file_mappings)[i];
1109 srfmp->fd = files[i].sf_fd;
1110 srfmp->mappings_count = files[i].sf_mappings_count;
1111 srfmp->mappings = &mappings[mappings_next];
1112 mappings_next += srfmp->mappings_count;
1113 if (mappings_next > mappings_count) {
1114 error = EINVAL;
1115 goto done;
1116 }
1117 srfmp->slide = files[i].sf_slide;
1118 }
1119
1120 /* get the process's shared region (setup in vm_map_exec()) */
1121 shared_region = vm_shared_region_trim_and_get(current_task());
1122 *shared_region_ptr = shared_region;
1123 if (shared_region == NULL) {
1124 SHARED_REGION_TRACE_ERROR(
1125 ("shared_region: %p [%d(%s)] map(): "
1126 "no shared region\n",
1127 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1128 proc_getpid(p), p->p_comm));
1129 error = EINVAL;
1130 goto done;
1131 }
1132
1133 /*
1134 * Check the shared region matches the current root
1135 * directory of this process. Deny the mapping to
1136 * avoid tainting the shared region with something that
1137 * doesn't quite belong into it.
1138 */
1139 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
1140 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
1141 SHARED_REGION_TRACE_ERROR(
1142 ("shared_region: map(%p) root_dir mismatch\n",
1143 (void *)VM_KERNEL_ADDRPERM(current_thread())));
1144 error = EPERM;
1145 goto done;
1146 }
1147
1148
1149 for (srfmp = &(*sr_file_mappings)[0];
1150 srfmp < &(*sr_file_mappings)[files_count];
1151 srfmp++) {
1152 if (srfmp->mappings_count == 0) {
1153 /* no mappings here... */
1154 continue;
1155 }
1156
1157 /*
1158 * A file descriptor of -1 is used to indicate that the data
1159 * to be put in the shared region for this mapping comes directly
1160 * from the processes address space. Ensure we have proper alignments.
1161 */
1162 if (srfmp->fd == -1) {
1163 /* only allow one mapping per fd */
1164 if (srfmp->mappings_count > 1) {
1165 SHARED_REGION_TRACE_ERROR(
1166 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
1167 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1168 proc_getpid(p), p->p_comm));
1169 error = EINVAL;
1170 goto done;
1171 }
1172
1173 /*
1174 * The destination address and size must be page aligned.
1175 */
1176 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
1177 mach_vm_address_t dest_addr = mapping->sms_address;
1178 mach_vm_size_t map_size = mapping->sms_size;
1179 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
1180 SHARED_REGION_TRACE_ERROR(
1181 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
1182 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1183 proc_getpid(p), p->p_comm, dest_addr));
1184 error = EINVAL;
1185 goto done;
1186 }
1187 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
1188 SHARED_REGION_TRACE_ERROR(
1189 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
1190 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1191 proc_getpid(p), p->p_comm, map_size));
1192 error = EINVAL;
1193 goto done;
1194 }
1195 continue;
1196 }
1197
1198 /* get file structure from file descriptor */
1199 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
1200 if (error) {
1201 SHARED_REGION_TRACE_ERROR(
1202 ("shared_region: %p [%d(%s)] map: "
1203 "fd=%d lookup failed (error=%d)\n",
1204 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1205 proc_getpid(p), p->p_comm, srfmp->fd, error));
1206 goto done;
1207 }
1208
1209 /* we need at least read permission on the file */
1210 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
1211 SHARED_REGION_TRACE_ERROR(
1212 ("shared_region: %p [%d(%s)] map: "
1213 "fd=%d not readable\n",
1214 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1215 proc_getpid(p), p->p_comm, srfmp->fd));
1216 error = EPERM;
1217 goto done;
1218 }
1219
1220 /* get vnode from file structure */
1221 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
1222 if (error) {
1223 SHARED_REGION_TRACE_ERROR(
1224 ("shared_region: %p [%d(%s)] map: "
1225 "fd=%d getwithref failed (error=%d)\n",
1226 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1227 proc_getpid(p), p->p_comm, srfmp->fd, error));
1228 goto done;
1229 }
1230 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
1231
1232 /* make sure the vnode is a regular file */
1233 if (srfmp->vp->v_type != VREG) {
1234 SHARED_REGION_TRACE_ERROR(
1235 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1236 "not a file (type=%d)\n",
1237 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1238 proc_getpid(p), p->p_comm,
1239 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1240 srfmp->vp->v_name, srfmp->vp->v_type));
1241 error = EINVAL;
1242 goto done;
1243 }
1244
1245 #if CONFIG_MACF
1246 /* pass in 0 for the offset argument because AMFI does not need the offset
1247 * of the shared cache */
1248 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1249 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
1250 if (error) {
1251 goto done;
1252 }
1253 #endif /* MAC */
1254
1255 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1256 /*
1257 * Check if the shared cache is in the trust cache;
1258 * if so, we can skip the root ownership check.
1259 */
1260 #if DEVELOPMENT || DEBUG
1261 /*
1262 * Skip both root ownership and trust cache check if
1263 * enforcement is disabled.
1264 */
1265 if (!cs_system_enforcement()) {
1266 goto after_root_check;
1267 }
1268 #endif /* DEVELOPMENT || DEBUG */
1269 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
1270 if (blob == NULL) {
1271 SHARED_REGION_TRACE_ERROR(
1272 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1273 "missing CS blob\n",
1274 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1275 proc_getpid(p), p->p_comm,
1276 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1277 srfmp->vp->v_name));
1278 goto root_check;
1279 }
1280 const uint8_t *cdhash = csblob_get_cdhash(blob);
1281 if (cdhash == NULL) {
1282 SHARED_REGION_TRACE_ERROR(
1283 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1284 "missing cdhash\n",
1285 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1286 proc_getpid(p), p->p_comm,
1287 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1288 srfmp->vp->v_name));
1289 goto root_check;
1290 }
1291
1292 bool in_trust_cache = false;
1293 TrustCacheQueryToken_t qt;
1294 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
1295 TCType_t tc_type = kTCTypeInvalid;
1296 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
1297 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
1298 (tc_type == kTCTypeCryptex1BootOS ||
1299 tc_type == kTCTypeStatic ||
1300 tc_type == kTCTypeEngineering));
1301 }
1302 if (!in_trust_cache) {
1303 SHARED_REGION_TRACE_ERROR(
1304 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1305 "not in trust cache\n",
1306 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1307 proc_getpid(p), p->p_comm,
1308 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1309 srfmp->vp->v_name));
1310 goto root_check;
1311 }
1312 goto after_root_check;
1313 root_check:
1314 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1315
1316 /* The shared cache file must be owned by root */
1317 VATTR_INIT(&va);
1318 VATTR_WANTED(&va, va_uid);
1319 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1320 if (error) {
1321 SHARED_REGION_TRACE_ERROR(
1322 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1323 "vnode_getattr(%p) failed (error=%d)\n",
1324 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1325 proc_getpid(p), p->p_comm,
1326 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1327 srfmp->vp->v_name,
1328 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1329 error));
1330 goto done;
1331 }
1332 if (va.va_uid != 0) {
1333 SHARED_REGION_TRACE_ERROR(
1334 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1335 "owned by uid=%d instead of 0\n",
1336 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1337 proc_getpid(p), p->p_comm,
1338 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1339 srfmp->vp->v_name, va.va_uid));
1340 error = EPERM;
1341 goto done;
1342 }
1343
1344 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1345 after_root_check:
1346 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1347
1348 #if CONFIG_CSR
1349 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
1350 VATTR_INIT(&va);
1351 VATTR_WANTED(&va, va_flags);
1352 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1353 if (error) {
1354 SHARED_REGION_TRACE_ERROR(
1355 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1356 "vnode_getattr(%p) failed (error=%d)\n",
1357 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1358 proc_getpid(p), p->p_comm,
1359 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1360 srfmp->vp->v_name,
1361 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1362 error));
1363 goto done;
1364 }
1365
1366 if (!(va.va_flags & SF_RESTRICTED)) {
1367 /*
1368 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
1369 * the shared cache file is NOT SIP-protected, so reject the
1370 * mapping request
1371 */
1372 SHARED_REGION_TRACE_ERROR(
1373 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
1374 "vnode is not SIP-protected. \n",
1375 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1376 proc_getpid(p), p->p_comm,
1377 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1378 srfmp->vp->v_name));
1379 error = EPERM;
1380 goto done;
1381 }
1382 }
1383 #else /* CONFIG_CSR */
1384
1385 /*
1386 * Devices without SIP/ROSP need to make sure that the shared cache
1387 * is either on the root volume or in the preboot cryptex volume.
1388 */
1389 assert(rdir_vp != NULL);
1390 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
1391 vnode_t preboot_vp = NULL;
1392 #if XNU_TARGET_OS_OSX
1393 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
1394 #else
1395 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
1396 #endif
1397 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
1398 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
1399 SHARED_REGION_TRACE_ERROR(
1400 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1401 "not on process' root volume nor preboot volume\n",
1402 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1403 proc_getpid(p), p->p_comm,
1404 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1405 srfmp->vp->v_name));
1406 error = EPERM;
1407 if (preboot_vp) {
1408 (void)vnode_put(preboot_vp);
1409 }
1410 goto done;
1411 } else if (preboot_vp) {
1412 (void)vnode_put(preboot_vp);
1413 }
1414 }
1415 #endif /* CONFIG_CSR */
1416
1417 if (scdir_enforce) {
1418 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1419 struct vnode *scdir_vp = NULL;
1420 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1421 *expected_scdir_path != NULL;
1422 expected_scdir_path++) {
1423 /* get vnode for expected_scdir_path */
1424 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
1425 if (error) {
1426 SHARED_REGION_TRACE_ERROR(
1427 ("shared_region: %p [%d(%s)]: "
1428 "vnode_lookup(%s) failed (error=%d)\n",
1429 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1430 proc_getpid(p), p->p_comm,
1431 *expected_scdir_path, error));
1432 continue;
1433 }
1434
1435 /* check if parent is scdir_vp */
1436 assert(scdir_vp != NULL);
1437 if (vnode_parent(srfmp->vp) == scdir_vp) {
1438 (void)vnode_put(scdir_vp);
1439 scdir_vp = NULL;
1440 goto scdir_ok;
1441 }
1442 (void)vnode_put(scdir_vp);
1443 scdir_vp = NULL;
1444 }
1445 /* nothing matches */
1446 SHARED_REGION_TRACE_ERROR(
1447 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1448 "shared cache file not in expected directory\n",
1449 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1450 proc_getpid(p), p->p_comm,
1451 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1452 srfmp->vp->v_name));
1453 error = EPERM;
1454 goto done;
1455 }
1456 scdir_ok:
1457
1458 /* get vnode size */
1459 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
1460 if (error) {
1461 SHARED_REGION_TRACE_ERROR(
1462 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1463 "vnode_size(%p) failed (error=%d)\n",
1464 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1465 proc_getpid(p), p->p_comm,
1466 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1467 srfmp->vp->v_name,
1468 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
1469 goto done;
1470 }
1471 srfmp->file_size = fs;
1472
1473 /* get the file's memory object handle */
1474 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
1475 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
1476 SHARED_REGION_TRACE_ERROR(
1477 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1478 "no memory object\n",
1479 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1480 proc_getpid(p), p->p_comm,
1481 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1482 srfmp->vp->v_name));
1483 error = EINVAL;
1484 goto done;
1485 }
1486
1487 /* check that the mappings are properly covered by code signatures */
1488 if (!cs_system_enforcement()) {
1489 /* code signing is not enforced: no need to check */
1490 } else {
1491 for (i = 0; i < srfmp->mappings_count; i++) {
1492 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
1493 /* zero-filled mapping: not backed by the file */
1494 continue;
1495 }
1496 if (ubc_cs_is_range_codesigned(srfmp->vp,
1497 srfmp->mappings[i].sms_file_offset,
1498 srfmp->mappings[i].sms_size)) {
1499 /* this mapping is fully covered by code signatures */
1500 continue;
1501 }
1502 SHARED_REGION_TRACE_ERROR(
1503 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1504 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
1505 "is not code-signed\n",
1506 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1507 proc_getpid(p), p->p_comm,
1508 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1509 srfmp->vp->v_name,
1510 i, srfmp->mappings_count,
1511 srfmp->mappings[i].sms_address,
1512 srfmp->mappings[i].sms_size,
1513 srfmp->mappings[i].sms_file_offset,
1514 srfmp->mappings[i].sms_max_prot,
1515 srfmp->mappings[i].sms_init_prot));
1516 error = EINVAL;
1517 goto done;
1518 }
1519 }
1520 }
1521 done:
1522 if (error != 0) {
1523 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
1524 *sr_file_mappings = NULL;
1525 *shared_region_ptr = NULL;
1526 }
1527 return error;
1528 }
1529
1530 /*
1531 * shared_region_map_np()
1532 *
1533 * This system call is intended for dyld.
1534 *
1535 * dyld uses this to map a shared cache file into a shared region.
1536 * This is usually done only the first time a shared cache is needed.
1537 * Subsequent processes will just use the populated shared region without
1538 * requiring any further setup.
1539 */
1540 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)1541 _shared_region_map_and_slide(
1542 struct proc *p,
1543 uint32_t files_count,
1544 struct shared_file_np *files,
1545 uint32_t mappings_count,
1546 struct shared_file_mapping_slide_np *mappings)
1547 {
1548 int error = 0;
1549 kern_return_t kr = KERN_SUCCESS;
1550 struct _sr_file_mappings *sr_file_mappings = NULL;
1551 struct vnode *rdir_vp = NULL;
1552 struct vm_shared_region *shared_region = NULL;
1553
1554 /*
1555 * Get a reference to the current proc's root dir.
1556 * Need this to prevent racing with chroot.
1557 */
1558 proc_fdlock(p);
1559 rdir_vp = p->p_fd.fd_rdir;
1560 if (rdir_vp == NULL) {
1561 rdir_vp = rootvnode;
1562 }
1563 assert(rdir_vp != NULL);
1564 vnode_get(rdir_vp);
1565 proc_fdunlock(p);
1566
1567 /*
1568 * Turn files, mappings into sr_file_mappings and other setup.
1569 */
1570 error = shared_region_map_and_slide_setup(p, files_count,
1571 files, mappings_count, mappings,
1572 &sr_file_mappings, &shared_region, rdir_vp);
1573 if (error != 0) {
1574 vnode_put(rdir_vp);
1575 return error;
1576 }
1577
1578 /* map the file(s) into that shared region's submap */
1579 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
1580 if (kr != KERN_SUCCESS) {
1581 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
1582 "vm_shared_region_map_file() failed kr=0x%x\n",
1583 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1584 proc_getpid(p), p->p_comm, kr));
1585 }
1586
1587 /* convert kern_return_t to errno */
1588 switch (kr) {
1589 case KERN_SUCCESS:
1590 error = 0;
1591 break;
1592 case KERN_INVALID_ADDRESS:
1593 error = EFAULT;
1594 break;
1595 case KERN_PROTECTION_FAILURE:
1596 error = EPERM;
1597 break;
1598 case KERN_NO_SPACE:
1599 error = ENOMEM;
1600 break;
1601 case KERN_FAILURE:
1602 case KERN_INVALID_ARGUMENT:
1603 default:
1604 error = EINVAL;
1605 break;
1606 }
1607
1608 /*
1609 * Mark that this process is now using split libraries.
1610 */
1611 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
1612 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1613 }
1614
1615 vnode_put(rdir_vp);
1616 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
1617
1618 SHARED_REGION_TRACE_DEBUG(
1619 ("shared_region: %p [%d(%s)] <- map\n",
1620 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1621 proc_getpid(p), p->p_comm));
1622
1623 return error;
1624 }
1625
1626 /*
1627 * Clean up part of _shared_region_map_and_slide()
1628 * It had to be broken out of _shared_region_map_and_slide() to
1629 * prevent compiler inlining from blowing out the stack.
1630 */
1631 __attribute__((noinline))
1632 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)1633 shared_region_map_and_slide_cleanup(
1634 struct proc *p,
1635 uint32_t files_count,
1636 struct _sr_file_mappings *sr_file_mappings,
1637 struct vm_shared_region *shared_region)
1638 {
1639 struct _sr_file_mappings *srfmp;
1640 struct vnode_attr va;
1641
1642 if (sr_file_mappings != NULL) {
1643 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
1644 if (srfmp->vp != NULL) {
1645 vnode_lock_spin(srfmp->vp);
1646 srfmp->vp->v_flag |= VSHARED_DYLD;
1647 vnode_unlock(srfmp->vp);
1648
1649 /* update the vnode's access time */
1650 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
1651 VATTR_INIT(&va);
1652 nanotime(&va.va_access_time);
1653 VATTR_SET_ACTIVE(&va, va_access_time);
1654 vnode_setattr(srfmp->vp, &va, vfs_context_current());
1655 }
1656
1657 #if NAMEDSTREAMS
1658 /*
1659 * If the shared cache is compressed, it may
1660 * have a namedstream vnode instantiated for
1661 * for it. That namedstream vnode will also
1662 * have to be marked with VSHARED_DYLD.
1663 */
1664 if (vnode_hasnamedstreams(srfmp->vp)) {
1665 vnode_t svp;
1666 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
1667 NS_OPEN, 0, vfs_context_kernel()) == 0) {
1668 vnode_lock_spin(svp);
1669 svp->v_flag |= VSHARED_DYLD;
1670 vnode_unlock(svp);
1671 vnode_put(svp);
1672 }
1673 }
1674 #endif /* NAMEDSTREAMS */
1675 /*
1676 * release the vnode...
1677 * ubc_map() still holds it for us in the non-error case
1678 */
1679 (void) vnode_put(srfmp->vp);
1680 srfmp->vp = NULL;
1681 }
1682 if (srfmp->fp != NULL) {
1683 /* release the file descriptor */
1684 fp_drop(p, srfmp->fd, srfmp->fp, 0);
1685 srfmp->fp = NULL;
1686 }
1687 }
1688 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
1689 }
1690
1691 if (shared_region != NULL) {
1692 vm_shared_region_deallocate(shared_region);
1693 }
1694 }
1695
1696 /*
1697 * For each file mapped, we may have mappings for:
1698 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
1699 * so let's round up to 8 mappings per file.
1700 */
1701 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
1702
1703 /*
1704 * This is the new interface for setting up shared region mappings.
1705 *
1706 * The slide used for shared regions setup using this interface is done differently
1707 * from the old interface. The slide value passed in the shared_files_np represents
1708 * a max value. The kernel will choose a random value based on that, then use it
1709 * for all shared regions.
1710 */
1711 #if defined (__x86_64__)
1712 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
1713 #else
1714 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
1715 #endif
1716
1717 static inline __result_use_check kern_return_t
shared_region_map_and_slide_2_np_sanitize(struct proc * p,user_addr_t mappings_userspace_addr,unsigned int count,shared_file_mapping_slide_np_t * mappings)1718 shared_region_map_and_slide_2_np_sanitize(
1719 struct proc *p,
1720 user_addr_t mappings_userspace_addr,
1721 unsigned int count,
1722 shared_file_mapping_slide_np_t *mappings)
1723 {
1724 kern_return_t kr;
1725 vm_map_t map = current_map();
1726 mach_vm_address_t addr, end;
1727 mach_vm_offset_t offset, offset_end;
1728 mach_vm_size_t size, offset_size;
1729 user_addr_t slide_start, slide_end, slide_size;
1730 vm_prot_t cur;
1731 vm_prot_t max;
1732
1733 user_addr_t user_addr = mappings_userspace_addr;
1734
1735 for (size_t i = 0; i < count; i++) {
1736 shared_file_mapping_slide_np_ut mapping_u;
1737 /*
1738 * First we bring each mapping struct into our kernel stack to
1739 * avoid TOCTOU.
1740 */
1741 kr = shared_region_copyin(
1742 p,
1743 user_addr,
1744 1, // copy 1 element at a time
1745 sizeof(shared_file_mapping_slide_np_ut),
1746 &mapping_u);
1747 if (__improbable(kr != KERN_SUCCESS)) {
1748 return kr;
1749 }
1750
1751 /*
1752 * Then, we sanitize the data on the kernel stack.
1753 */
1754 kr = vm_sanitize_addr_size(
1755 mapping_u.sms_address_u,
1756 mapping_u.sms_size_u,
1757 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1758 map,
1759 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1760 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_START
1761 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_SIZE),
1762 &addr,
1763 &end,
1764 &size);
1765 if (__improbable(kr != KERN_SUCCESS)) {
1766 return kr;
1767 }
1768
1769 kr = vm_sanitize_addr_size(
1770 mapping_u.sms_file_offset_u,
1771 mapping_u.sms_size_u,
1772 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1773 PAGE_MASK,
1774 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1775 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1776 &offset,
1777 &offset_end,
1778 &offset_size);
1779 if (__improbable(kr != KERN_SUCCESS)) {
1780 return kr;
1781 }
1782 if (__improbable(0 != (offset & vm_map_page_mask(map)))) {
1783 return KERN_INVALID_ARGUMENT;
1784 }
1785
1786 /*
1787 * Unsafe access is immediately followed by wrap to
1788 * convert from addr to size.
1789 */
1790 mach_vm_size_ut sms_slide_size_u =
1791 vm_sanitize_wrap_size(
1792 VM_SANITIZE_UNSAFE_UNWRAP(
1793 mapping_u.sms_slide_size_u));
1794
1795 kr = vm_sanitize_addr_size(
1796 mapping_u.sms_slide_start_u,
1797 sms_slide_size_u,
1798 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1799 map,
1800 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1801 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1802 &slide_start,
1803 &slide_end,
1804 &slide_size);
1805 if (__improbable(kr != KERN_SUCCESS)) {
1806 return kr;
1807 }
1808
1809 kr = vm_sanitize_cur_and_max_prots(
1810 mapping_u.sms_init_prot_u,
1811 mapping_u.sms_max_prot_u,
1812 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1813 map,
1814 VM_PROT_SFM_EXTENSIONS_MASK | VM_PROT_TPRO,
1815 &cur,
1816 &max);
1817 if (__improbable(kr != KERN_SUCCESS)) {
1818 return kr;
1819 }
1820
1821 /*
1822 * Finally, we move the data from the kernel stack to our
1823 * caller-allocated kernel heap buffer.
1824 */
1825 mappings[i].sms_address = addr;
1826 mappings[i].sms_size = size;
1827 mappings[i].sms_file_offset = offset;
1828 mappings[i].sms_slide_size = slide_size;
1829 mappings[i].sms_slide_start = slide_start;
1830 mappings[i].sms_max_prot = max;
1831 mappings[i].sms_init_prot = cur;
1832
1833 if (__improbable(os_add_overflow(
1834 user_addr,
1835 sizeof(shared_file_mapping_slide_np_ut),
1836 &user_addr))) {
1837 return KERN_INVALID_ARGUMENT;
1838 }
1839 }
1840
1841 return KERN_SUCCESS;
1842 }
1843
1844 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)1845 shared_region_map_and_slide_2_np(
1846 struct proc *p,
1847 struct shared_region_map_and_slide_2_np_args *uap,
1848 __unused int *retvalp)
1849 {
1850 unsigned int files_count;
1851 struct shared_file_np *shared_files = NULL;
1852 unsigned int mappings_count;
1853 struct shared_file_mapping_slide_np *mappings = NULL;
1854 kern_return_t kr = KERN_SUCCESS;
1855
1856 files_count = uap->files_count;
1857 mappings_count = uap->mappings_count;
1858
1859 if (files_count == 0) {
1860 SHARED_REGION_TRACE_INFO(
1861 ("shared_region: %p [%d(%s)] map(): "
1862 "no files\n",
1863 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1864 proc_getpid(p), p->p_comm));
1865 kr = 0; /* no files to map: we're done ! */
1866 goto done;
1867 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
1868 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
1869 if (shared_files == NULL) {
1870 kr = KERN_RESOURCE_SHORTAGE;
1871 goto done;
1872 }
1873 } else {
1874 SHARED_REGION_TRACE_ERROR(
1875 ("shared_region: %p [%d(%s)] map(): "
1876 "too many files (%d) max %d\n",
1877 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1878 proc_getpid(p), p->p_comm,
1879 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
1880 kr = KERN_FAILURE;
1881 goto done;
1882 }
1883
1884 if (mappings_count == 0) {
1885 SHARED_REGION_TRACE_INFO(
1886 ("shared_region: %p [%d(%s)] map(): "
1887 "no mappings\n",
1888 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1889 proc_getpid(p), p->p_comm));
1890 kr = 0; /* no mappings: we're done ! */
1891 goto done;
1892 } else if (mappings_count <= SFM_MAX) {
1893 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
1894 if (mappings == NULL) {
1895 kr = KERN_RESOURCE_SHORTAGE;
1896 goto done;
1897 }
1898 } else {
1899 SHARED_REGION_TRACE_ERROR(
1900 ("shared_region: %p [%d(%s)] map(): "
1901 "too many mappings (%d) max %d\n",
1902 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1903 proc_getpid(p), p->p_comm,
1904 mappings_count, SFM_MAX));
1905 kr = KERN_FAILURE;
1906 goto done;
1907 }
1908
1909 /*
1910 * struct shared_file_np does not have fields that are subject to
1911 * sanitization, it is thus copied from userspace as is.
1912 */
1913 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
1914 if (kr != KERN_SUCCESS) {
1915 goto done;
1916 }
1917
1918 kr = shared_region_map_and_slide_2_np_sanitize(
1919 p,
1920 uap->mappings_u,
1921 mappings_count,
1922 mappings);
1923 if (__improbable(kr != KERN_SUCCESS)) {
1924 kr = vm_sanitize_get_kr(kr);
1925 goto done;
1926 }
1927
1928 uint32_t max_slide = shared_files[0].sf_slide;
1929 uint32_t random_val;
1930 uint32_t slide_amount;
1931
1932 if (max_slide != 0) {
1933 read_random(&random_val, sizeof random_val);
1934 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
1935 } else {
1936 slide_amount = 0;
1937 }
1938 #if DEVELOPMENT || DEBUG
1939 extern bool bootarg_disable_aslr;
1940 if (bootarg_disable_aslr) {
1941 slide_amount = 0;
1942 }
1943 #endif /* DEVELOPMENT || DEBUG */
1944
1945 /*
1946 * Fix up the mappings to reflect the desired slide.
1947 */
1948 unsigned int f;
1949 unsigned int m = 0;
1950 unsigned int i;
1951 for (f = 0; f < files_count; ++f) {
1952 shared_files[f].sf_slide = slide_amount;
1953 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
1954 if (m >= mappings_count) {
1955 SHARED_REGION_TRACE_ERROR(
1956 ("shared_region: %p [%d(%s)] map(): "
1957 "mapping count argument was too small\n",
1958 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1959 proc_getpid(p), p->p_comm));
1960 kr = KERN_FAILURE;
1961 goto done;
1962 }
1963 if (__improbable(
1964 os_add_overflow(
1965 mappings[m].sms_address,
1966 slide_amount,
1967 &mappings[m].sms_address))) {
1968 kr = KERN_INVALID_ARGUMENT;
1969 goto done;
1970 }
1971 if (mappings[m].sms_slide_size != 0) {
1972 mach_vm_address_t discard;
1973 /* Slide and check that new start/size pairs do not overflow. */
1974 if (__improbable(
1975 os_add_overflow(
1976 mappings[m].sms_slide_start,
1977 slide_amount,
1978 &mappings[m].sms_slide_start) ||
1979 os_add_overflow(
1980 mappings[m].sms_slide_start,
1981 mappings[m].sms_slide_size,
1982 &discard))) {
1983 kr = KERN_INVALID_ARGUMENT;
1984 goto done;
1985 }
1986 }
1987 }
1988 }
1989
1990 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
1991 done:
1992 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
1993 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
1994 return kr;
1995 }
1996
1997 /*
1998 * A syscall for dyld to use to map data pages that need load time relocation fixups.
1999 * The fixups are performed by a custom pager during page-in, so the pages still appear
2000 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2001 * on demand later, all w/o using the compressor.
2002 *
2003 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2004 * running, they are COW'd as normal.
2005 */
2006 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2007 map_with_linking_np(
2008 struct proc *p,
2009 struct map_with_linking_np_args *uap,
2010 __unused int *retvalp)
2011 {
2012 uint32_t region_count;
2013 uint32_t r;
2014 struct mwl_region *regions = NULL;
2015 struct mwl_region *rp;
2016 uint32_t link_info_size;
2017 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
2018 struct mwl_info_hdr *info_hdr = NULL;
2019 uint64_t binds_size;
2020 int fd;
2021 struct fileproc *fp = NULL;
2022 struct vnode *vp = NULL;
2023 size_t file_size;
2024 off_t fs;
2025 struct vnode_attr va;
2026 memory_object_control_t file_control = NULL;
2027 int error;
2028 kern_return_t kr = KERN_SUCCESS;
2029
2030 /*
2031 * Check if dyld has told us it finished with this call.
2032 */
2033 if (p->p_disallow_map_with_linking) {
2034 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
2035 __func__, proc_getpid(p), p->p_comm);
2036 kr = KERN_FAILURE;
2037 goto done;
2038 }
2039
2040 /*
2041 * First we do some sanity checking on what dyld has passed us.
2042 */
2043 region_count = uap->region_count;
2044 link_info_size = uap->link_info_size;
2045 if (region_count == 0) {
2046 printf("%s: [%d(%s)]: region_count == 0\n",
2047 __func__, proc_getpid(p), p->p_comm);
2048 kr = KERN_FAILURE;
2049 goto done;
2050 }
2051 if (region_count > MWL_MAX_REGION_COUNT) {
2052 printf("%s: [%d(%s)]: region_count too big %d\n",
2053 __func__, proc_getpid(p), p->p_comm, region_count);
2054 kr = KERN_FAILURE;
2055 goto done;
2056 }
2057
2058 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
2059 printf("%s: [%d(%s)]: link_info_size too small\n",
2060 __func__, proc_getpid(p), p->p_comm);
2061 kr = KERN_FAILURE;
2062 goto done;
2063 }
2064 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
2065 printf("%s: [%d(%s)]: link_info_size too big %d\n",
2066 __func__, proc_getpid(p), p->p_comm, link_info_size);
2067 kr = KERN_FAILURE;
2068 goto done;
2069 }
2070
2071 /*
2072 * Allocate and copyin the regions and link info
2073 */
2074 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
2075 if (regions == NULL) {
2076 printf("%s: [%d(%s)]: failed to allocate regions\n",
2077 __func__, proc_getpid(p), p->p_comm);
2078 kr = KERN_RESOURCE_SHORTAGE;
2079 goto done;
2080 }
2081 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
2082 if (kr != KERN_SUCCESS) {
2083 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
2084 __func__, proc_getpid(p), p->p_comm, kr);
2085 goto done;
2086 }
2087
2088 link_info = kalloc_data(link_info_size, Z_WAITOK);
2089 if (link_info == NULL) {
2090 printf("%s: [%d(%s)]: failed to allocate link_info\n",
2091 __func__, proc_getpid(p), p->p_comm);
2092 kr = KERN_RESOURCE_SHORTAGE;
2093 goto done;
2094 }
2095 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
2096 if (kr != KERN_SUCCESS) {
2097 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
2098 __func__, proc_getpid(p), p->p_comm, kr);
2099 goto done;
2100 }
2101
2102 /*
2103 * Do some verification the data structures.
2104 */
2105 info_hdr = (struct mwl_info_hdr *)link_info;
2106 if (info_hdr->mwli_version != MWL_INFO_VERS) {
2107 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
2108 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
2109 kr = KERN_FAILURE;
2110 goto done;
2111 }
2112
2113 if (info_hdr->mwli_binds_offset > link_info_size) {
2114 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
2115 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
2116 kr = KERN_FAILURE;
2117 goto done;
2118 }
2119
2120 /* some older devs have s/w page size > h/w page size, no need to support them */
2121 if (info_hdr->mwli_page_size != PAGE_SIZE) {
2122 /* no printf, since this is expected on some devices */
2123 kr = KERN_INVALID_ARGUMENT;
2124 goto done;
2125 }
2126
2127 binds_size = (uint64_t)info_hdr->mwli_binds_count *
2128 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
2129 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
2130 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
2131 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
2132 kr = KERN_FAILURE;
2133 goto done;
2134 }
2135
2136 if (info_hdr->mwli_chains_offset > link_info_size) {
2137 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
2138 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
2139 kr = KERN_FAILURE;
2140 goto done;
2141 }
2142
2143
2144 /*
2145 * Ensure the chained starts in the link info and make sure the
2146 * segment info offsets are within bounds.
2147 */
2148 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
2149 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
2150 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2151 kr = KERN_FAILURE;
2152 goto done;
2153 }
2154 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
2155 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
2156 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2157 kr = KERN_FAILURE;
2158 goto done;
2159 }
2160
2161 /* Note that more verification of offsets is done in the pager itself */
2162
2163 /*
2164 * Ensure we've only been given one FD and verify valid protections.
2165 */
2166 fd = regions[0].mwlr_fd;
2167 for (r = 0; r < region_count; ++r) {
2168 if (regions[r].mwlr_fd != fd) {
2169 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
2170 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
2171 kr = KERN_FAILURE;
2172 goto done;
2173 }
2174
2175 /*
2176 * Only allow data mappings and not zero fill. Permit TPRO
2177 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
2178 */
2179 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
2180 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
2181 __func__, proc_getpid(p), p->p_comm);
2182 kr = KERN_FAILURE;
2183 goto done;
2184 }
2185 if (regions[r].mwlr_protections & VM_PROT_ZF) {
2186 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
2187 __func__, proc_getpid(p), p->p_comm, r);
2188 kr = KERN_FAILURE;
2189 goto done;
2190 }
2191 if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
2192 !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
2193 printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
2194 __func__, proc_getpid(p), p->p_comm, r);
2195 kr = KERN_FAILURE;
2196 goto done;
2197 }
2198 }
2199
2200
2201 /* get file structure from file descriptor */
2202 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
2203 if (error) {
2204 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
2205 __func__, proc_getpid(p), p->p_comm, error);
2206 kr = KERN_FAILURE;
2207 goto done;
2208 }
2209
2210 /* We need at least read permission on the file */
2211 if (!(fp->fp_glob->fg_flag & FREAD)) {
2212 printf("%s: [%d(%s)]: not readable\n",
2213 __func__, proc_getpid(p), p->p_comm);
2214 kr = KERN_FAILURE;
2215 goto done;
2216 }
2217
2218 /* Get the vnode from file structure */
2219 vp = (struct vnode *)fp_get_data(fp);
2220 error = vnode_getwithref(vp);
2221 if (error) {
2222 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
2223 __func__, proc_getpid(p), p->p_comm, error);
2224 kr = KERN_FAILURE;
2225 vp = NULL; /* just to be sure */
2226 goto done;
2227 }
2228
2229 /* Make sure the vnode is a regular file */
2230 if (vp->v_type != VREG) {
2231 printf("%s: [%d(%s)]: vnode not VREG\n",
2232 __func__, proc_getpid(p), p->p_comm);
2233 kr = KERN_FAILURE;
2234 goto done;
2235 }
2236
2237 /* get vnode size */
2238 error = vnode_size(vp, &fs, vfs_context_current());
2239 if (error) {
2240 goto done;
2241 }
2242 file_size = fs;
2243
2244 /* get the file's memory object handle */
2245 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
2246 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
2247 printf("%s: [%d(%s)]: no memory object\n",
2248 __func__, proc_getpid(p), p->p_comm);
2249 kr = KERN_FAILURE;
2250 goto done;
2251 }
2252
2253 for (r = 0; r < region_count; ++r) {
2254 rp = ®ions[r];
2255
2256 #if CONFIG_MACF
2257 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
2258 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2259 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
2260 if (error) {
2261 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
2262 __func__, proc_getpid(p), p->p_comm, r, error);
2263 kr = KERN_FAILURE;
2264 goto done;
2265 }
2266 #endif /* MAC */
2267
2268 /* check that the mappings are properly covered by code signatures */
2269 if (cs_system_enforcement()) {
2270 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
2271 printf("%s: [%d(%s)]: region %d, not code signed\n",
2272 __func__, proc_getpid(p), p->p_comm, r);
2273 kr = KERN_FAILURE;
2274 goto done;
2275 }
2276 }
2277 }
2278
2279 /* update the vnode's access time */
2280 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
2281 VATTR_INIT(&va);
2282 nanotime(&va.va_access_time);
2283 VATTR_SET_ACTIVE(&va, va_access_time);
2284 vnode_setattr(vp, &va, vfs_context_current());
2285 }
2286
2287 /* get the VM to do the work */
2288 kr = vm_map_with_linking(proc_task(p), regions, region_count, &link_info, link_info_size, file_control);
2289
2290 done:
2291 if (fp != NULL) {
2292 /* release the file descriptor */
2293 fp_drop(p, fd, fp, 0);
2294 }
2295 if (vp != NULL) {
2296 (void)vnode_put(vp);
2297 }
2298 if (regions != NULL) {
2299 kfree_data(regions, region_count * sizeof(regions[0]));
2300 }
2301 /* link info is NULL if it is used in the pager, if things worked */
2302 if (link_info != NULL) {
2303 kfree_data(link_info, link_info_size);
2304 }
2305
2306 switch (kr) {
2307 case KERN_SUCCESS:
2308 return 0;
2309 case KERN_RESOURCE_SHORTAGE:
2310 return ENOMEM;
2311 default:
2312 return EINVAL;
2313 }
2314 }
2315
2316 #if DEBUG || DEVELOPMENT
2317 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
2318 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
2319 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
2320 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
2321 #endif /* DEBUG || DEVELOPMENT */
2322
2323 /* sysctl overflow room */
2324
2325 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2326 (int *) &page_size, 0, "vm page size");
2327
2328 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2329 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2330 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2331 extern unsigned int vm_page_free_target;
2332 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2333 &vm_page_free_target, 0, "Pageout daemon free target");
2334
2335 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2336 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2337
2338 static int
2339 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2340 {
2341 #pragma unused(oidp, arg1, arg2)
2342 unsigned int page_free_wanted;
2343
2344 page_free_wanted = mach_vm_ctl_page_free_wanted();
2345 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2346 }
2347 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2348 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2349 0, 0, vm_ctl_page_free_wanted, "I", "");
2350
2351 extern unsigned int vm_page_purgeable_count;
2352 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2353 &vm_page_purgeable_count, 0, "Purgeable page count");
2354
2355 extern unsigned int vm_page_purgeable_wired_count;
2356 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2357 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2358
2359 extern unsigned int vm_page_kern_lpage_count;
2360 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2361 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2362
2363 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
2364 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
2365
2366 #if DEVELOPMENT || DEBUG
2367 #if __ARM_MIXED_PAGE_SIZE__
2368 static int vm_mixed_pagesize_supported = 1;
2369 #else
2370 static int vm_mixed_pagesize_supported = 0;
2371 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2372 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2373 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
2374
2375 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
2376 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
2377
2378 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
2379 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
2380 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
2381 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
2382 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
2383 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
2384
2385 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2386 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
2387 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2388 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
2389 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2390 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
2391 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2392 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
2393 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2394 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
2395 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2396 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
2397 #endif /* DEVELOPMENT || DEBUG */
2398
2399 extern int madvise_free_debug;
2400 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
2401 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
2402 extern int madvise_free_debug_sometimes;
2403 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
2404 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
2405
2406 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2407 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
2408 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2409 &vm_page_stats_reusable.reusable_pages_success, "");
2410 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2411 &vm_page_stats_reusable.reusable_pages_failure, "");
2412 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2413 &vm_page_stats_reusable.reusable_pages_shared, "");
2414 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2415 &vm_page_stats_reusable.all_reusable_calls, "");
2416 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2417 &vm_page_stats_reusable.partial_reusable_calls, "");
2418 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2419 &vm_page_stats_reusable.reuse_pages_success, "");
2420 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2421 &vm_page_stats_reusable.reuse_pages_failure, "");
2422 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2423 &vm_page_stats_reusable.all_reuse_calls, "");
2424 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2425 &vm_page_stats_reusable.partial_reuse_calls, "");
2426 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2427 &vm_page_stats_reusable.can_reuse_success, "");
2428 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2429 &vm_page_stats_reusable.can_reuse_failure, "");
2430 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
2431 &vm_page_stats_reusable.reusable_reclaimed, "");
2432 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
2433 &vm_page_stats_reusable.reusable_nonwritable, "");
2434 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2435 &vm_page_stats_reusable.reusable_shared, "");
2436 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2437 &vm_page_stats_reusable.free_shared, "");
2438
2439
2440 extern unsigned int vm_page_free_count, vm_page_speculative_count;
2441 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
2442 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
2443
2444 extern unsigned int vm_page_cleaned_count;
2445 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
2446
2447 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
2448 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
2449 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
2450
2451 /* pageout counts */
2452 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
2453 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
2454
2455 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
2456 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
2457 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2458 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
2459 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2460 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
2461
2462 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
2463 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
2464 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
2465 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
2466 extern unsigned int vm_page_realtime_count;
2467 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
2468 extern int vm_pageout_protect_realtime;
2469 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
2470
2471 /* counts of pages prefaulted when entering a memory object */
2472 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
2473 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
2474 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
2475
2476 #if defined (__x86_64__)
2477 extern unsigned int vm_clump_promote_threshold;
2478 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
2479 #if DEVELOPMENT || DEBUG
2480 extern unsigned long vm_clump_stats[];
2481 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
2482 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
2483 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
2484 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
2485 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
2486 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
2487 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
2488 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
2489 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
2490 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
2491 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
2492 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
2493 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
2494 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
2495 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
2496 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
2497 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
2498 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
2499 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
2500 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
2501 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
2502 #endif /* if DEVELOPMENT || DEBUG */
2503 #endif /* #if defined (__x86_64__) */
2504
2505 #if CONFIG_SECLUDED_MEMORY
2506
2507 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
2508 extern unsigned int vm_page_secluded_target;
2509 extern unsigned int vm_page_secluded_count;
2510 extern unsigned int vm_page_secluded_count_free;
2511 extern unsigned int vm_page_secluded_count_inuse;
2512 extern unsigned int vm_page_secluded_count_over_target;
2513 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
2514 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
2515 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
2516 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
2517 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
2518
2519 extern struct vm_page_secluded_data vm_page_secluded;
2520 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
2521 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
2522 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
2523 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
2524 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
2525 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
2526 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
2527 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
2528 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
2529
2530 #endif /* CONFIG_SECLUDED_MEMORY */
2531
2532 #pragma mark Deferred Reclaim
2533
2534 #if CONFIG_DEFERRED_RECLAIM
2535
2536 #if DEVELOPMENT || DEBUG
2537 /*
2538 * VM reclaim testing
2539 */
2540 extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
2541
2542 static int
2543 sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
2544 {
2545 #pragma unused(arg1, arg2)
2546 int error = EINVAL, pid = 0;
2547 /*
2548 * Only send on write
2549 */
2550 error = sysctl_handle_int(oidp, &pid, 0, req);
2551 if (error || !req->newptr) {
2552 return error;
2553 }
2554
2555 bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
2556 if (success) {
2557 error = 0;
2558 }
2559
2560 return error;
2561 }
2562
2563 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
2564 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2565 &sysctl_vm_reclaim_drain_async_queue, "I", "");
2566
2567 static int
2568 sysctl_vm_reclaim_from_pid SYSCTL_HANDLER_ARGS
2569 {
2570 int error = EINVAL;
2571 pid_t pid;
2572 error = sysctl_handle_int(oidp, &pid, 0, req);
2573 /* Only reclaim on write */
2574 if (error || !req->newptr) {
2575 return error;
2576 }
2577 if (pid <= 0) {
2578 return EINVAL;
2579 }
2580 proc_t p = proc_find(pid);
2581 if (p == PROC_NULL) {
2582 return ESRCH;
2583 }
2584 task_t t = proc_task(p);
2585 if (t == TASK_NULL) {
2586 proc_rele(p);
2587 return ESRCH;
2588 }
2589 task_reference(t);
2590 proc_rele(p);
2591 vm_deferred_reclamation_reclaim_from_task_sync(t, UINT64_MAX);
2592 task_deallocate(t);
2593 return 0;
2594 }
2595
2596 SYSCTL_PROC(_vm, OID_AUTO, reclaim_from_pid,
2597 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2598 &sysctl_vm_reclaim_from_pid, "I",
2599 "Drain the deferred reclamation buffer for a pid");
2600
2601 static int
2602 sysctl_vm_reclaim_drain_all_buffers SYSCTL_HANDLER_ARGS
2603 {
2604 /* Only reclaim on write */
2605 if (!req->newptr) {
2606 return EINVAL;
2607 }
2608 vm_deferred_reclamation_reclaim_all_memory(RECLAIM_OPTIONS_NONE);
2609 return 0;
2610 }
2611
2612 SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_all_buffers,
2613 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2614 &sysctl_vm_reclaim_drain_all_buffers, "I",
2615 "Drain all system-wide deferred reclamation buffers");
2616
2617
2618 extern uint64_t vm_reclaim_max_threshold;
2619 extern uint64_t vm_reclaim_trim_divisor;
2620
2621 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
2622 SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
2623 #endif /* DEVELOPMENT || DEBUG */
2624
2625 #endif /* CONFIG_DEFERRED_RECLAIM */
2626
2627 #include <kern/thread.h>
2628 #include <sys/user.h>
2629
2630 void vm_pageout_io_throttle(void);
2631
2632 void
vm_pageout_io_throttle(void)2633 vm_pageout_io_throttle(void)
2634 {
2635 struct uthread *uthread = current_uthread();
2636
2637 /*
2638 * thread is marked as a low priority I/O type
2639 * and the I/O we issued while in this cleaning operation
2640 * collided with normal I/O operations... we'll
2641 * delay in order to mitigate the impact of this
2642 * task on the normal operation of the system
2643 */
2644
2645 if (uthread->uu_lowpri_window) {
2646 throttle_lowpri_io(1);
2647 }
2648 }
2649
2650 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)2651 vm_pressure_monitor(
2652 __unused struct proc *p,
2653 struct vm_pressure_monitor_args *uap,
2654 int *retval)
2655 {
2656 kern_return_t kr;
2657 uint32_t pages_reclaimed;
2658 uint32_t pages_wanted;
2659
2660 kr = mach_vm_pressure_monitor(
2661 (boolean_t) uap->wait_for_pressure,
2662 uap->nsecs_monitored,
2663 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
2664 &pages_wanted);
2665
2666 switch (kr) {
2667 case KERN_SUCCESS:
2668 break;
2669 case KERN_ABORTED:
2670 return EINTR;
2671 default:
2672 return EINVAL;
2673 }
2674
2675 if (uap->pages_reclaimed) {
2676 if (copyout((void *)&pages_reclaimed,
2677 uap->pages_reclaimed,
2678 sizeof(pages_reclaimed)) != 0) {
2679 return EFAULT;
2680 }
2681 }
2682
2683 *retval = (int) pages_wanted;
2684 return 0;
2685 }
2686
2687 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)2688 kas_info(struct proc *p,
2689 struct kas_info_args *uap,
2690 int *retval __unused)
2691 {
2692 #ifndef CONFIG_KAS_INFO
2693 (void)p;
2694 (void)uap;
2695 return ENOTSUP;
2696 #else /* CONFIG_KAS_INFO */
2697 int selector = uap->selector;
2698 user_addr_t valuep = uap->value;
2699 user_addr_t sizep = uap->size;
2700 user_size_t size, rsize;
2701 int error;
2702
2703 if (!kauth_cred_issuser(kauth_cred_get())) {
2704 return EPERM;
2705 }
2706
2707 #if CONFIG_MACF
2708 error = mac_system_check_kas_info(kauth_cred_get(), selector);
2709 if (error) {
2710 return error;
2711 }
2712 #endif
2713
2714 if (IS_64BIT_PROCESS(p)) {
2715 user64_size_t size64;
2716 error = copyin(sizep, &size64, sizeof(size64));
2717 size = (user_size_t)size64;
2718 } else {
2719 user32_size_t size32;
2720 error = copyin(sizep, &size32, sizeof(size32));
2721 size = (user_size_t)size32;
2722 }
2723 if (error) {
2724 return error;
2725 }
2726
2727 switch (selector) {
2728 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
2729 {
2730 uint64_t slide = vm_kernel_slide;
2731
2732 if (sizeof(slide) != size) {
2733 return EINVAL;
2734 }
2735
2736 error = copyout(&slide, valuep, sizeof(slide));
2737 if (error) {
2738 return error;
2739 }
2740 rsize = size;
2741 }
2742 break;
2743 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
2744 {
2745 uint32_t i;
2746 kernel_mach_header_t *mh = &_mh_execute_header;
2747 struct load_command *cmd;
2748 cmd = (struct load_command*) &mh[1];
2749 uint64_t *bases;
2750 rsize = mh->ncmds * sizeof(uint64_t);
2751
2752 /*
2753 * Return the size if no data was passed
2754 */
2755 if (valuep == 0) {
2756 break;
2757 }
2758
2759 if (rsize > size) {
2760 return EINVAL;
2761 }
2762
2763 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
2764
2765 for (i = 0; i < mh->ncmds; i++) {
2766 if (cmd->cmd == LC_SEGMENT_KERNEL) {
2767 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
2768 bases[i] = (uint64_t)sg->vmaddr;
2769 }
2770 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
2771 }
2772
2773 error = copyout(bases, valuep, rsize);
2774
2775 kfree_data(bases, rsize);
2776
2777 if (error) {
2778 return error;
2779 }
2780 }
2781 break;
2782 case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
2783 case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
2784 {
2785 #if CONFIG_SPTM
2786 const uint64_t slide =
2787 (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
2788 #else
2789 const uint64_t slide = 0;
2790 #endif
2791
2792 if (sizeof(slide) != size) {
2793 return EINVAL;
2794 }
2795
2796 error = copyout(&slide, valuep, sizeof(slide));
2797 if (error) {
2798 return error;
2799 }
2800 rsize = size;
2801 }
2802 break;
2803 default:
2804 return EINVAL;
2805 }
2806
2807 if (IS_64BIT_PROCESS(p)) {
2808 user64_size_t size64 = (user64_size_t)rsize;
2809 error = copyout(&size64, sizep, sizeof(size64));
2810 } else {
2811 user32_size_t size32 = (user32_size_t)rsize;
2812 error = copyout(&size32, sizep, sizeof(size32));
2813 }
2814
2815 return error;
2816 #endif /* CONFIG_KAS_INFO */
2817 }
2818
2819 #pragma clang diagnostic push
2820 #pragma clang diagnostic ignored "-Wcast-qual"
2821 #pragma clang diagnostic ignored "-Wunused-function"
2822
2823 static void
asserts()2824 asserts()
2825 {
2826 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
2827 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
2828 }
2829
2830 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
2831 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
2832 #pragma clang diagnostic pop
2833
2834 extern uint32_t vm_page_pages;
2835 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
2836
2837 extern uint32_t vm_page_busy_absent_skipped;
2838 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
2839
2840 extern uint32_t vm_page_upl_tainted;
2841 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
2842
2843 extern uint32_t vm_page_iopl_tainted;
2844 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
2845
2846 #if __arm64__ && (DEVELOPMENT || DEBUG)
2847 extern int vm_footprint_suspend_allowed;
2848 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
2849
2850 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
2851 static int
2852 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
2853 {
2854 #pragma unused(oidp, arg1, arg2)
2855 int error = 0;
2856 int new_value;
2857
2858 if (req->newptr == USER_ADDR_NULL) {
2859 return 0;
2860 }
2861 error = SYSCTL_IN(req, &new_value, sizeof(int));
2862 if (error) {
2863 return error;
2864 }
2865 if (!vm_footprint_suspend_allowed) {
2866 if (new_value != 0) {
2867 /* suspends are not allowed... */
2868 return 0;
2869 }
2870 /* ... but let resumes proceed */
2871 }
2872 DTRACE_VM2(footprint_suspend,
2873 vm_map_t, current_map(),
2874 int, new_value);
2875
2876 pmap_footprint_suspend(current_map(), new_value);
2877
2878 return 0;
2879 }
2880 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
2881 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2882 0, 0, &sysctl_vm_footprint_suspend, "I", "");
2883 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
2884
2885 extern uint64_t vm_map_corpse_footprint_count;
2886 extern uint64_t vm_map_corpse_footprint_size_avg;
2887 extern uint64_t vm_map_corpse_footprint_size_max;
2888 extern uint64_t vm_map_corpse_footprint_full;
2889 extern uint64_t vm_map_corpse_footprint_no_buf;
2890 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
2891 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
2892 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
2893 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
2894 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
2895 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
2896 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
2897 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
2898 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
2899 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
2900
2901 #if CODE_SIGNING_MONITOR
2902 extern uint64_t vm_cs_defer_to_csm;
2903 extern uint64_t vm_cs_defer_to_csm_not;
2904 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
2905 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
2906 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
2907 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
2908 #endif /* CODE_SIGNING_MONITOR */
2909
2910 extern uint64_t shared_region_pager_copied;
2911 extern uint64_t shared_region_pager_slid;
2912 extern uint64_t shared_region_pager_slid_error;
2913 extern uint64_t shared_region_pager_reclaimed;
2914 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
2915 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
2916 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
2917 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
2918 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
2919 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
2920 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
2921 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
2922 extern int shared_region_destroy_delay;
2923 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
2924 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
2925
2926 #if MACH_ASSERT
2927 extern int pmap_ledgers_panic_leeway;
2928 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
2929 #endif /* MACH_ASSERT */
2930
2931
2932 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
2933 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
2934 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
2935 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
2936 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
2937 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
2938 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
2939 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
2940 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
2941 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
2942 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
2943 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
2944 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
2945 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
2946 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
2947 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
2948 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
2949 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
2950 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
2951 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
2952 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
2953 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
2954 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
2955 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
2956 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
2957 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
2958 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
2959 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
2960 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
2961 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
2962 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
2963 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
2964 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
2965 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
2966 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
2967 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
2968 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
2969 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
2970 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
2971
2972 extern int vm_protect_privileged_from_untrusted;
2973 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
2974 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
2975 extern uint64_t vm_copied_on_read;
2976 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
2977 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
2978
2979 extern int vm_shared_region_count;
2980 extern int vm_shared_region_peak;
2981 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
2982 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
2983 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
2984 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
2985 #if DEVELOPMENT || DEBUG
2986 extern unsigned int shared_region_pagers_resident_count;
2987 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
2988 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
2989 extern unsigned int shared_region_pagers_resident_peak;
2990 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
2991 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
2992 extern int shared_region_pager_count;
2993 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
2994 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
2995 #if __has_feature(ptrauth_calls)
2996 extern int shared_region_key_count;
2997 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
2998 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
2999 extern int vm_shared_region_reslide_count;
3000 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3001 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3002 #endif /* __has_feature(ptrauth_calls) */
3003 #endif /* DEVELOPMENT || DEBUG */
3004
3005 #if MACH_ASSERT
3006 extern int debug4k_filter;
3007 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3008 extern int debug4k_panic_on_terminate;
3009 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3010 extern int debug4k_panic_on_exception;
3011 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3012 extern int debug4k_panic_on_misaligned_sharing;
3013 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3014 #endif /* MACH_ASSERT */
3015
3016 extern uint64_t vm_map_set_size_limit_count;
3017 extern uint64_t vm_map_set_data_limit_count;
3018 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3019 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3020 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3021 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3022 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3023 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3024
3025 extern uint64_t vm_fault_resilient_media_initiate;
3026 extern uint64_t vm_fault_resilient_media_retry;
3027 extern uint64_t vm_fault_resilient_media_proceed;
3028 extern uint64_t vm_fault_resilient_media_release;
3029 extern uint64_t vm_fault_resilient_media_abort1;
3030 extern uint64_t vm_fault_resilient_media_abort2;
3031 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3032 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3033 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3034 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3035 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3036 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3037 #if MACH_ASSERT
3038 extern int vm_fault_resilient_media_inject_error1_rate;
3039 extern int vm_fault_resilient_media_inject_error1;
3040 extern int vm_fault_resilient_media_inject_error2_rate;
3041 extern int vm_fault_resilient_media_inject_error2;
3042 extern int vm_fault_resilient_media_inject_error3_rate;
3043 extern int vm_fault_resilient_media_inject_error3;
3044 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3045 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3046 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3047 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3048 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3049 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3050 #endif /* MACH_ASSERT */
3051
3052 extern uint64_t pmap_query_page_info_retries;
3053 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3054
3055 /*
3056 * A sysctl which causes all existing shared regions to become stale. They
3057 * will no longer be used by anything new and will be torn down as soon as
3058 * the last existing user exits. A write of non-zero value causes that to happen.
3059 * This should only be used by launchd, so we check that this is initproc.
3060 */
3061 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3062 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3063 {
3064 unsigned int value = 0;
3065 int changed = 0;
3066 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3067 if (error || !changed) {
3068 return error;
3069 }
3070 if (current_proc() != initproc) {
3071 return EPERM;
3072 }
3073
3074 vm_shared_region_pivot();
3075
3076 return 0;
3077 }
3078
3079 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3080 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3081 0, 0, shared_region_pivot, "I", "");
3082
3083 extern uint64_t vm_object_shadow_forced;
3084 extern uint64_t vm_object_shadow_skipped;
3085 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3086 &vm_object_shadow_forced, "");
3087 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3088 &vm_object_shadow_skipped, "");
3089
3090 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3091 &vmtc_total, 0, "total text page corruptions detected");
3092
3093
3094 #if DEBUG || DEVELOPMENT
3095 /*
3096 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3097 * Used for testing text page self healing.
3098 */
3099 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3100 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3101 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3102 {
3103 uint64_t value = 0;
3104 int error = sysctl_handle_quad(oidp, &value, 0, req);
3105 if (error || !req->newptr) {
3106 return error;
3107 }
3108
3109 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3110 return 0;
3111 } else {
3112 return EINVAL;
3113 }
3114 }
3115
3116 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3117 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3118 0, 0, corrupt_text_addr, "-", "");
3119 #endif /* DEBUG || DEVELOPMENT */
3120
3121 #if CONFIG_MAP_RANGES
3122 /*
3123 * vm.malloc_ranges
3124 *
3125 * space-separated list of <left:right> hexadecimal addresses.
3126 */
3127 static int
3128 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
3129 {
3130 vm_map_t map = current_map();
3131 struct mach_vm_range r1, r2;
3132 char str[20 * 4];
3133 int len;
3134 mach_vm_offset_t right_hole_max;
3135
3136 if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
3137 return ENOENT;
3138 }
3139 if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
3140 return ENOENT;
3141 }
3142
3143 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
3144 right_hole_max = MACH_VM_JUMBO_ADDRESS;
3145 #else /* !XNU_TARGET_OS_IOS || !EXTENDED_USER_VA_SUPPORT */
3146 right_hole_max = get_map_max(map);
3147 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
3148
3149 len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
3150 r1.max_address, r2.min_address,
3151 r2.max_address, right_hole_max);
3152
3153 return SYSCTL_OUT(req, str, len);
3154 }
3155
3156 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
3157 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3158 0, 0, &vm_map_malloc_ranges, "A", "");
3159
3160 #if DEBUG || DEVELOPMENT
3161 static int
3162 vm_map_user_range_default SYSCTL_HANDLER_ARGS
3163 {
3164 #pragma unused(arg1, arg2, oidp)
3165 struct mach_vm_range range;
3166
3167 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
3168 != KERN_SUCCESS) {
3169 return EINVAL;
3170 }
3171
3172 return SYSCTL_OUT(req, &range, sizeof(range));
3173 }
3174
3175 static int
3176 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
3177 {
3178 #pragma unused(arg1, arg2, oidp)
3179 struct mach_vm_range range;
3180
3181 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
3182 != KERN_SUCCESS) {
3183 return EINVAL;
3184 }
3185
3186 return SYSCTL_OUT(req, &range, sizeof(range));
3187 }
3188
3189 static int
3190 vm_map_user_range_large_file SYSCTL_HANDLER_ARGS
3191 {
3192 #pragma unused(arg1, arg2, oidp)
3193 struct mach_vm_range range;
3194
3195 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_LARGE_FILE, &range)
3196 != KERN_SUCCESS) {
3197 return EINVAL;
3198 }
3199
3200 return SYSCTL_OUT(req, &range, sizeof(range));
3201 }
3202
3203 /*
3204 * A sysctl that can be used to return ranges for the current VM map.
3205 * Used for testing VM ranges.
3206 */
3207 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3208 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
3209 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3210 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
3211 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_large_file, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3212 0, 0, &vm_map_user_range_large_file, "S,mach_vm_range", "");
3213
3214 #endif /* DEBUG || DEVELOPMENT */
3215 #endif /* CONFIG_MAP_RANGES */
3216
3217 #if DEBUG || DEVELOPMENT
3218 #endif /* DEBUG || DEVELOPMENT */
3219
3220 extern uint64_t vm_map_range_overflows_count;
3221 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
3222 extern boolean_t vm_map_range_overflows_log;
3223 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
3224
3225 extern uint64_t c_seg_filled_no_contention;
3226 extern uint64_t c_seg_filled_contention;
3227 extern clock_sec_t c_seg_filled_contention_sec_max;
3228 extern clock_nsec_t c_seg_filled_contention_nsec_max;
3229 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
3230 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
3231 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
3232 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
3233 #if (XNU_TARGET_OS_OSX && __arm64__)
3234 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
3235 extern int c_process_major_yield_after; /* yield after moving ? segments */
3236 extern uint64_t c_process_major_reports;
3237 extern clock_sec_t c_process_major_max_sec;
3238 extern clock_nsec_t c_process_major_max_nsec;
3239 extern uint32_t c_process_major_peak_segcount;
3240 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
3241 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
3242 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
3243 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
3244 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
3245 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
3246 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3247
3248 #if DEVELOPMENT || DEBUG
3249 extern int panic_object_not_alive;
3250 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
3251 #endif /* DEVELOPMENT || DEBUG */
3252
3253 #if FBDP_DEBUG_OBJECT_NO_PAGER
3254 extern int fbdp_no_panic;
3255 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
3256 #endif /* MACH_ASSERT */
3257
3258
3259 #if DEVELOPMENT || DEBUG
3260
3261
3262 /* The largest possible single segment + its slots is (sizeof(c_segment_info) + C_SLOT_MAX_INDEX * sizeof(c_slot_info)), so this should be enough */
3263 #define SYSCTL_SEG_BUF_SIZE (8 * 1024)
3264
3265 extern uint32_t c_segments_available;
3266
3267 struct sysctl_buf_header {
3268 uint32_t magic;
3269 } __attribute__((packed));
3270
3271 /* This sysctl iterates over the populated c_segments and writes some info about each one and its slots.
3272 * instead of doing everything here, the function calls a function vm_compressor.c. */
3273 static int
sysctl_compressor_segments(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3274 sysctl_compressor_segments(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3275 {
3276 char* buf = kalloc_data(SYSCTL_SEG_BUF_SIZE, Z_WAITOK | Z_ZERO);
3277 if (!buf) {
3278 return ENOMEM;
3279 }
3280 size_t offset = 0;
3281 int error = 0;
3282 int segno = 0;
3283 /* 4 byte header to identify the version of the formatting of the data.
3284 * This should be incremented if c_segment_info or c_slot_info are changed */
3285 ((struct sysctl_buf_header*)buf)->magic = VM_C_SEGMENT_INFO_MAGIC;
3286 offset += sizeof(uint32_t);
3287
3288 while (segno < c_segments_available) {
3289 size_t left_sz = SYSCTL_SEG_BUF_SIZE - offset;
3290 kern_return_t kr = vm_compressor_serialize_segment_debug_info(segno, buf + offset, &left_sz);
3291 if (kr == KERN_NO_SPACE) {
3292 /* failed to add another segment, push the current buffer out and try again */
3293 if (offset == 0) {
3294 error = EINVAL; /* no space to write but I didn't write anything, shouldn't really happen */
3295 goto out;
3296 }
3297 /* write out chunk */
3298 error = SYSCTL_OUT(req, buf, offset);
3299 if (error) {
3300 goto out;
3301 }
3302 offset = 0;
3303 bzero(buf, SYSCTL_SEG_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3304 /* don't increment segno, need to try again saving the current one */
3305 } else if (kr != KERN_SUCCESS) {
3306 error = EINVAL;
3307 goto out;
3308 } else {
3309 offset += left_sz;
3310 ++segno;
3311 }
3312 }
3313
3314 if (offset > 0) { /* write last chunk */
3315 error = SYSCTL_OUT(req, buf, offset);
3316 }
3317
3318 out:
3319 kfree_data(buf, SYSCTL_SEG_BUF_SIZE)
3320 return error;
3321 }
3322
3323 SYSCTL_PROC(_vm, OID_AUTO, compressor_segments, CTLTYPE_STRUCT | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_compressor_segments, "S", "");
3324
3325
3326 extern uint32_t vm_compressor_fragmentation_level(void);
3327
3328 static int
sysctl_compressor_fragmentation_level(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3329 sysctl_compressor_fragmentation_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3330 {
3331 uint32_t value = vm_compressor_fragmentation_level();
3332 return SYSCTL_OUT(req, &value, sizeof(value));
3333 }
3334
3335 SYSCTL_PROC(_vm, OID_AUTO, compressor_fragmentation_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_compressor_fragmentation_level, "IU", "");
3336
3337
3338 #define SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE (8 * 1024)
3339
3340
3341 /* This sysctl iterates over all the entries of the vm_map of the a given process and write some info about the vm_object pointed by the entries.
3342 * This can be used for mapping where are all the pages of a process located in the compressor.
3343 */
3344 static int
sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)3345 sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3346 {
3347 int error = 0;
3348 char *buf = NULL;
3349 proc_t p = PROC_NULL;
3350 task_t task = TASK_NULL;
3351 vm_map_t map = VM_MAP_NULL;
3352 __block size_t offset = 0;
3353
3354 /* go from pid to proc to task to vm_map. see sysctl_procargsx() for another example of this procession */
3355 int *name = arg1;
3356 int namelen = arg2;
3357 if (namelen < 1) {
3358 return EINVAL;
3359 }
3360 int pid = name[0];
3361 p = proc_find(pid); /* this increments a reference to the proc */
3362 if (p == PROC_NULL) {
3363 return EINVAL;
3364 }
3365 task = proc_task(p);
3366 proc_rele(p); /* decrement ref of proc */
3367 p = PROC_NULL;
3368 if (task == TASK_NULL) {
3369 return EINVAL;
3370 }
3371 /* convert proc reference to task reference */
3372 task_reference(task);
3373 /* task reference to map reference */
3374 map = get_task_map_reference(task);
3375 task_deallocate(task);
3376
3377 if (map == VM_MAP_NULL) {
3378 return EINVAL; /* nothing allocated yet */
3379 }
3380
3381 buf = kalloc_data(SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE, Z_WAITOK | Z_ZERO);
3382 if (!buf) {
3383 error = ENOMEM;
3384 goto out;
3385 }
3386
3387 /* 4 byte header to identify the version of the formatting of the data.
3388 * This should be incremented if c_segment_info or c_slot_info are changed */
3389 ((struct sysctl_buf_header*)buf)->magic = VM_MAP_ENTRY_INFO_MAGIC;
3390 offset += sizeof(uint32_t);
3391
3392 kern_return_t (^write_header)(int) = ^kern_return_t (int nentries) {
3393 /* write the header, happens only once at the beginning so we should have enough space */
3394 assert(offset + sizeof(struct vm_map_info_hdr) < SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE);
3395 struct vm_map_info_hdr* out_hdr = (struct vm_map_info_hdr*)(buf + offset);
3396 out_hdr->vmi_nentries = nentries;
3397 offset += sizeof(struct vm_map_info_hdr);
3398 return KERN_SUCCESS;
3399 };
3400
3401 kern_return_t (^write_entry)(void*) = ^kern_return_t (void* entry) {
3402 while (true) { /* try up to 2 times, first try write the the current buffer, otherwise to a new buffer */
3403 size_t left_sz = SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE - offset;
3404 kern_return_t kr = vm_map_dump_entry_and_compressor_pager(entry, buf + offset, &left_sz);
3405 if (kr == KERN_NO_SPACE) {
3406 /* failed to write anything, flush the current buffer and try again */
3407 if (offset == 0) {
3408 return KERN_FAILURE; /* no space to write but I didn't write anything yet, shouldn't really happen */
3409 }
3410 /* write out chunk */
3411 int out_error = SYSCTL_OUT(req, buf, offset);
3412 if (out_error) {
3413 return KERN_FAILURE;
3414 }
3415 offset = 0;
3416 bzero(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3417 continue; /* need to retry the entry dump again with the cleaned buffer */
3418 } else if (kr != KERN_SUCCESS) {
3419 return kr;
3420 }
3421 offset += left_sz;
3422 break;
3423 }
3424 return KERN_SUCCESS;
3425 };
3426
3427 /* this foreach first calls to the first callback with the number of entries, then calls the second for every entry
3428 * when the buffer is exhausted, it is flushed to the sysctl and restarted */
3429 kern_return_t kr = vm_map_entries_foreach(map, write_header, write_entry);
3430
3431 if (kr != KERN_SUCCESS) {
3432 goto out;
3433 }
3434
3435 if (offset > 0) { /* last chunk */
3436 error = SYSCTL_OUT(req, buf, offset);
3437 }
3438
3439 out:
3440 if (buf != NULL) {
3441 kfree_data(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE)
3442 }
3443 if (map != NULL) {
3444 vm_map_deallocate(map);
3445 }
3446 return error;
3447 }
3448
3449 SYSCTL_PROC(_vm, OID_AUTO, task_vm_objects_slotmap, CTLTYPE_NODE | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_task_vm_objects_slotmap, "S", "");
3450
3451
3452
3453 #endif /* DEVELOPMENT || DEBUG */
3454