1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40 #include <vm/vm_options.h>
41
42 #include <kern/ecc.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/extmod_statistics.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/sdt.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
57 #include <machine/machine_routines.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/xattr.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/random.h>
86 #include <sys/code_signing.h>
87 #if NECP
88 #include <net/necp.h>
89 #endif /* NECP */
90 #if SKYWALK
91 #include <skywalk/os_channel.h>
92 #endif /* SKYWALK */
93
94 #include <security/audit/audit.h>
95 #include <security/mac.h>
96 #include <bsm/audit_kevents.h>
97
98 #include <kern/kalloc.h>
99 #include <kern/host_statistics.h>
100
101 #include <vm/vm_map_internal.h>
102 #include <vm/vm_kern_xnu.h>
103 #include <vm/vm_pageout_xnu.h>
104
105 #include <mach/shared_region.h>
106 #include <vm/vm_shared_region_internal.h>
107
108 #include <vm/vm_dyld_pager_internal.h>
109 #include <vm/vm_protos_internal.h>
110 #include <vm/vm_compressor_info.h> /* for c_segment_info */
111 #include <vm/vm_compressor_internal.h>
112 #include <vm/vm_compressor_xnu.h> /* for vm_compressor_serialize_segment_debug_info() */
113 #include <vm/vm_object_xnu.h> /* for vm_chead_select_t */
114 #include <vm/vm_memory_entry_xnu.h>
115 #include <vm/vm_iokit.h>
116 #include <vm/vm_reclaim_xnu.h>
117 #if HAS_MTE
118 #include <arm64/mte_xnu.h>
119 #include <vm/vm_compressor_xnu.h>
120 #include <vm/vm_mteinfo_internal.h>
121 #include <sys/ubc.h> /* for mach_to_bsd_errno() */
122 #endif /* HAS_MTE */
123
124 #include <sys/kern_memorystatus.h>
125 #include <sys/kern_memorystatus_freeze.h>
126 #include <sys/proc_internal.h>
127
128 #include <mach-o/fixup-chains.h>
129
130 #if CONFIG_MACF
131 #include <security/mac_framework.h>
132 #endif
133
134 #include <kern/bits.h>
135
136 #if CONFIG_CSR
137 #include <sys/csr.h>
138 #endif /* CONFIG_CSR */
139 #include <sys/trust_caches.h>
140 #include <libkern/amfi/amfi.h>
141 #include <IOKit/IOBSD.h>
142
143 #if VM_MAP_DEBUG_APPLE_PROTECT
144 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
145 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
146
147 #if DEVELOPMENT || DEBUG
148
149 extern int vm_object_cache_evict_all(void);
150 static int
151 sysctl_vm_object_cache_evict SYSCTL_HANDLER_ARGS
152 {
153 #pragma unused(arg1, arg2, req)
154 (void) vm_object_cache_evict_all();
155 return 0;
156 }
157
158 SYSCTL_PROC(_vm, OID_AUTO, object_cache_evict, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
159 0, 0, &sysctl_vm_object_cache_evict, "I", "");
160
161 static int
162 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
163 {
164 #pragma unused(arg1, arg2)
165 vm_offset_t kaddr;
166 kern_return_t kr;
167 int error = 0;
168 int size = 0;
169
170 error = sysctl_handle_int(oidp, &size, 0, req);
171 if (error || !req->newptr) {
172 return error;
173 }
174
175 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
176 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
177
178 if (kr == KERN_SUCCESS) {
179 kmem_free(kernel_map, kaddr, size);
180 }
181
182 return error;
183 }
184
185 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
186 0, 0, &sysctl_kmem_alloc_contig, "I", "");
187
188 extern int vm_region_footprint;
189 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
190
191 static int
192 sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
193 {
194 #pragma unused(arg1, arg2, oidp)
195 kmem_gobj_stats stats = kmem_get_gobj_stats();
196
197 return SYSCTL_OUT(req, &stats, sizeof(stats));
198 }
199
200 SYSCTL_PROC(_vm, OID_AUTO, kmem_gobj_stats,
201 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
202 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
203
204 #endif /* DEVELOPMENT || DEBUG */
205
206 static int
207 sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
208 {
209 #pragma unused(arg1, arg2, oidp)
210 int error = 0;
211 int value;
212
213 value = task_self_region_footprint();
214 error = SYSCTL_OUT(req, &value, sizeof(int));
215 if (error) {
216 return error;
217 }
218
219 if (!req->newptr) {
220 return 0;
221 }
222
223 error = SYSCTL_IN(req, &value, sizeof(int));
224 if (error) {
225 return error;
226 }
227 task_self_region_footprint_set(value);
228 return 0;
229 }
230 SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
231
232 static int
233 sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
234 {
235 #pragma unused(arg1, arg2, oidp)
236 int error = 0;
237 int value;
238
239 value = (1 << thread_self_region_page_shift());
240 error = SYSCTL_OUT(req, &value, sizeof(int));
241 if (error) {
242 return error;
243 }
244
245 if (!req->newptr) {
246 return 0;
247 }
248
249 error = SYSCTL_IN(req, &value, sizeof(int));
250 if (error) {
251 return error;
252 }
253
254 if (value != 0 && value != 4096 && value != 16384) {
255 return EINVAL;
256 }
257
258 #if !__ARM_MIXED_PAGE_SIZE__
259 if (value != vm_map_page_size(current_map())) {
260 return EINVAL;
261 }
262 #endif /* !__ARM_MIXED_PAGE_SIZE__ */
263
264 thread_self_region_page_shift_set(bit_first(value));
265 return 0;
266 }
267 SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
268
269 static int
270 sysctl_vm_self_region_info_flags SYSCTL_HANDLER_ARGS
271 {
272 #pragma unused(arg1, arg2, oidp)
273 int error = 0;
274 int value;
275 kern_return_t kr;
276
277 value = task_self_region_info_flags();
278 error = SYSCTL_OUT(req, &value, sizeof(int));
279 if (error) {
280 return error;
281 }
282
283 if (!req->newptr) {
284 return 0;
285 }
286
287 error = SYSCTL_IN(req, &value, sizeof(int));
288 if (error) {
289 return error;
290 }
291
292 kr = task_self_region_info_flags_set(value);
293 if (kr != KERN_SUCCESS) {
294 return EINVAL;
295 }
296
297 return 0;
298 }
299 SYSCTL_PROC(_vm, OID_AUTO, self_region_info_flags, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_info_flags, "I", "");
300
301
302 #if DEVELOPMENT || DEBUG
303 extern int panic_on_unsigned_execute;
304 SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
305
306 extern int vm_log_xnu_user_debug;
307 SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
308 #endif /* DEVELOPMENT || DEBUG */
309
310 extern int vm_log_map_delete_permanent_prot_none;
311 SYSCTL_INT(_vm, OID_AUTO, log_map_delete_permanent_prot_none, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_map_delete_permanent_prot_none, 0, "");
312
313 extern int cs_executable_create_upl;
314 extern int cs_executable_wire;
315 SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
316 SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
317
318 extern int apple_protect_pager_count;
319 extern int apple_protect_pager_count_mapped;
320 extern unsigned int apple_protect_pager_cache_limit;
321 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
322 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
323 SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
324
325 #if DEVELOPMENT || DEBUG
326 extern int radar_20146450;
327 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
328
329 extern int macho_printf;
330 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
331
332 extern int apple_protect_pager_data_request_debug;
333 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
334
335 extern unsigned int vm_object_copy_delayed_paging_wait_disable;
336 EXPERIMENT_FACTOR_LEGACY_UINT(_vm, vm_object_copy_delayed_paging_wait_disable, &vm_object_copy_delayed_paging_wait_disable, FALSE, TRUE, "");
337
338 __enum_closed_decl(vm_submap_test_op, uint32_t, {
339 vsto_make_submap = 1, /* make submap from entries in current_map()
340 * at start..end, offset ignored */
341 vsto_remap_submap = 2, /* map in current_map() at start..end,
342 * from parent address submap_base_address
343 * and submap address offset */
344 vsto_end
345 });
346
347 static int
348 sysctl_vm_submap_test_ctl SYSCTL_HANDLER_ARGS
349 {
350 int error;
351 struct {
352 vm_submap_test_op op;
353 mach_vm_address_t submap_base_address;
354 mach_vm_address_t start;
355 mach_vm_address_t end;
356 mach_vm_address_t offset;
357 } args;
358 if (req->newlen != sizeof(args)) {
359 return EINVAL;
360 }
361 error = SYSCTL_IN(req, &args, sizeof(args));
362 if (error) {
363 return error;
364 }
365
366 switch (args.op) {
367 case vsto_make_submap:
368 vm_map_testing_make_sealed_submap(current_map(), args.start, args.end);
369 break;
370 case vsto_remap_submap:
371 vm_map_testing_remap_submap(current_map(),
372 args.submap_base_address, args.start, args.end, args.offset);
373 break;
374 default:
375 return EINVAL;
376 }
377
378 return 0;
379 }
380 SYSCTL_PROC(_vm, OID_AUTO, submap_test_ctl, CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0, &sysctl_vm_submap_test_ctl, "-", "");
381
382 #if __arm64__
383 /* These are meant to support the page table accounting unit test. */
384 extern unsigned int arm_hardware_page_size;
385 extern unsigned int arm_pt_desc_size;
386 extern unsigned int arm_pt_root_size;
387 extern unsigned int inuse_user_tteroot_count;
388 extern unsigned int inuse_kernel_tteroot_count;
389 extern unsigned int inuse_user_ttepages_count;
390 extern unsigned int inuse_kernel_ttepages_count;
391 extern unsigned int inuse_user_ptepages_count;
392 extern unsigned int inuse_kernel_ptepages_count;
393 SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
394 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
395 SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
396 SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
397 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
398 SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
399 SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
400 SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
401 SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
402 #if !CONFIG_SPTM
403 extern unsigned int free_page_size_tt_count;
404 extern unsigned int free_tt_count;
405 SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
406 SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
407 #endif
408 #if DEVELOPMENT || DEBUG
409 extern unsigned long pmap_asid_flushes;
410 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
411 extern unsigned long pmap_asid_hits;
412 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
413 extern unsigned long pmap_asid_misses;
414 SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
415 extern unsigned long pmap_speculation_restrictions;
416 SYSCTL_ULONG(_vm, OID_AUTO, pmap_speculation_restrictions, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_speculation_restrictions, "");
417 #endif
418 #endif /* __arm64__ */
419 #endif /* DEVELOPMENT || DEBUG */
420
421 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
422 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
423 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
424 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
425 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
426 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
427 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
428 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
429 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
430 #if VM_SCAN_FOR_SHADOW_CHAIN
431 static int vm_shadow_max_enabled = 0; /* Disabled by default */
432 extern int proc_shadow_max(void);
433 static int
434 vm_shadow_max SYSCTL_HANDLER_ARGS
435 {
436 #pragma unused(arg1, arg2, oidp)
437 int value = 0;
438
439 if (vm_shadow_max_enabled) {
440 value = proc_shadow_max();
441 }
442
443 return SYSCTL_OUT(req, &value, sizeof(value));
444 }
445 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
446 0, 0, &vm_shadow_max, "I", "");
447
448 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
449
450 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
451
452 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
453
454 #if PAGE_SLEEP_WITH_INHERITOR
455 #if DEVELOPMENT || DEBUG
456 extern uint32_t page_worker_table_size;
457 SYSCTL_INT(_vm, OID_AUTO, page_worker_table_size, CTLFLAG_RD | CTLFLAG_LOCKED, &page_worker_table_size, 0, "");
458 SCALABLE_COUNTER_DECLARE(page_worker_hash_collisions);
459 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_hash_collisions, page_worker_hash_collisions, "");
460 SCALABLE_COUNTER_DECLARE(page_worker_inheritor_sleeps);
461 SYSCTL_SCALABLE_COUNTER(_vm, page_worker_inheritor_sleeps, page_worker_inheritor_sleeps, "");
462 #endif /* DEVELOPMENT || DEBUG */
463 #endif /* PAGE_SLEEP_WITH_INHERITOR */
464
465 #if COMPRESSOR_PAGEOUT_CHEADS_MAX_COUNT > 1
466 extern uint32_t vm_cheads;
467 extern vm_chead_select_t vm_chead_select;
468 extern boolean_t vm_chead_rehint;
469 #if DEVELOPMENT || DEBUG
470 SYSCTL_UINT(_vm, OID_AUTO, compressor_heads, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cheads, 0, "");
471 SYSCTL_UINT(_vm, OID_AUTO, compressor_head_select, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_chead_select, 0, "");
472 SYSCTL_INT(_vm, OID_AUTO, compressor_head_rehint, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_chead_rehint, 0, "");
473 #endif /* DEVELOPMENT || DEBUG */
474 EXPERIMENT_FACTOR_UINT(compressor_heads, &vm_cheads, 1, COMPRESSOR_PAGEOUT_CHEADS_MAX_COUNT, "");
475 EXPERIMENT_FACTOR_UINT(compressor_head_select, &vm_chead_select, CSEL_MIN, CSEL_MAX, "");
476 EXPERIMENT_FACTOR_INT(compressor_head_rehint, &vm_chead_rehint, 0, 1, "");
477 #endif /* COMPRESSOR_PAGEOUT_CHEADS_MAX_COUNT > 1 */
478
479 /*
480 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
481 */
482
483 #if DEVELOPMENT || DEBUG
484 extern int allow_stack_exec, allow_data_exec;
485
486 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
487 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
488
489 #endif /* DEVELOPMENT || DEBUG */
490
491 static const char *prot_values[] = {
492 "none",
493 "read-only",
494 "write-only",
495 "read-write",
496 "execute-only",
497 "read-execute",
498 "write-execute",
499 "read-write-execute"
500 };
501
502 void
log_stack_execution_failure(addr64_t vaddr,vm_prot_t prot)503 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
504 {
505 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
506 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
507 }
508
509 /*
510 * shared_region_unnest_logging: level of logging of unnesting events
511 * 0 - no logging
512 * 1 - throttled logging of unexpected unnesting events (default)
513 * 2 - unthrottled logging of unexpected unnesting events
514 * 3+ - unthrottled logging of all unnesting events
515 */
516 int shared_region_unnest_logging = 1;
517
518 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
519 &shared_region_unnest_logging, 0, "");
520
521 int vm_shared_region_unnest_log_interval = 10;
522 int shared_region_unnest_log_count_threshold = 5;
523
524
525 #if XNU_TARGET_OS_OSX
526
527 #if defined (__x86_64__)
528 static int scdir_enforce = 1;
529 #else /* defined (__x86_64__) */
530 static int scdir_enforce = 0; /* AOT caches live elsewhere */
531 #endif /* defined (__x86_64__) */
532
533 static char *scdir_path[] = {
534 "/System/Library/dyld/",
535 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
536 "/System/Cryptexes/OS/System/Library/dyld",
537 NULL
538 };
539
540 #else /* XNU_TARGET_OS_OSX */
541
542 static int scdir_enforce = 0;
543 static char *scdir_path[] = {
544 "/System/Library/Caches/com.apple.dyld/",
545 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
546 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
547 NULL
548 };
549
550 #endif /* XNU_TARGET_OS_OSX */
551
552 static char *driverkit_scdir_path[] = {
553 "/System/DriverKit/System/Library/dyld/",
554 #if XNU_TARGET_OS_OSX
555 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
556 #else
557 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
558 #endif /* XNU_TARGET_OS_OSX */
559 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
560 NULL
561 };
562
563 #ifndef SECURE_KERNEL
564 static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
565 {
566 #if CONFIG_CSR
567 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
568 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
569 return EPERM;
570 }
571 #endif /* CONFIG_CSR */
572 return sysctl_handle_int(oidp, arg1, arg2, req);
573 }
574
575 SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
576 #endif
577
578 /* These log rate throttling state variables aren't thread safe, but
579 * are sufficient unto the task.
580 */
581 static int64_t last_unnest_log_time = 0;
582 static int shared_region_unnest_log_count = 0;
583
584 void
log_unnest_badness(vm_map_t m,vm_map_offset_t s,vm_map_offset_t e,boolean_t is_nested_map,vm_map_offset_t lowest_unnestable_addr)585 log_unnest_badness(
586 vm_map_t m,
587 vm_map_offset_t s,
588 vm_map_offset_t e,
589 boolean_t is_nested_map,
590 vm_map_offset_t lowest_unnestable_addr)
591 {
592 struct timeval tv;
593
594 if (shared_region_unnest_logging == 0) {
595 return;
596 }
597
598 if (shared_region_unnest_logging <= 2 &&
599 is_nested_map &&
600 s >= lowest_unnestable_addr) {
601 /*
602 * Unnesting of writable map entries is fine.
603 */
604 return;
605 }
606
607 if (shared_region_unnest_logging <= 1) {
608 microtime(&tv);
609 if ((tv.tv_sec - last_unnest_log_time) <
610 vm_shared_region_unnest_log_interval) {
611 if (shared_region_unnest_log_count++ >
612 shared_region_unnest_log_count_threshold) {
613 return;
614 }
615 } else {
616 last_unnest_log_time = tv.tv_sec;
617 shared_region_unnest_log_count = 0;
618 }
619 }
620
621 DTRACE_VM4(log_unnest_badness,
622 vm_map_t, m,
623 vm_map_offset_t, s,
624 vm_map_offset_t, e,
625 vm_map_offset_t, lowest_unnestable_addr);
626 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
627 }
628
629 uint64_t
vm_purge_filebacked_pagers(void)630 vm_purge_filebacked_pagers(void)
631 {
632 uint64_t pages_purged;
633
634 pages_purged = 0;
635 pages_purged += apple_protect_pager_purge_all();
636 pages_purged += shared_region_pager_purge_all();
637 pages_purged += dyld_pager_purge_all();
638 #if DEVELOPMENT || DEBUG
639 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
640 #endif /* DEVELOPMENT || DEBUG */
641 return pages_purged;
642 }
643
644 int
useracc(user_addr_ut addr_u,user_size_ut len_u,int prot)645 useracc(
646 user_addr_ut addr_u,
647 user_size_ut len_u,
648 int prot)
649 {
650 vm_map_t map;
651 vm_prot_t vm_prot = VM_PROT_WRITE;
652
653 map = current_map();
654
655 if (prot == B_READ) {
656 vm_prot = VM_PROT_READ;
657 }
658
659 return vm_map_check_protection(map, addr_u,
660 vm_sanitize_compute_ut_end(addr_u, len_u), vm_prot,
661 VM_SANITIZE_CALLER_USERACC);
662 }
663
664 #if XNU_PLATFORM_MacOSX
665 static __attribute__((always_inline, warn_unused_result))
666 kern_return_t
vslock_sanitize(vm_map_t map,user_addr_ut addr_u,user_size_ut len_u,vm_sanitize_caller_t vm_sanitize_caller,vm_map_offset_t * start,vm_map_offset_t * end,vm_map_size_t * size)667 vslock_sanitize(
668 vm_map_t map,
669 user_addr_ut addr_u,
670 user_size_ut len_u,
671 vm_sanitize_caller_t vm_sanitize_caller,
672 vm_map_offset_t *start,
673 vm_map_offset_t *end,
674 vm_map_size_t *size)
675 {
676 return vm_sanitize_addr_size(addr_u, len_u, vm_sanitize_caller,
677 map,
678 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS, start, end,
679 size);
680 }
681 #endif /* XNU_PLATFORM_MacOSX */
682
683 int
vslock(user_addr_ut addr,user_size_ut len)684 vslock(user_addr_ut addr, user_size_ut len)
685 {
686 kern_return_t kret;
687
688 #if XNU_PLATFORM_MacOSX
689 /*
690 * Preserve previous behavior on macOS for overflows due to bin
691 * compatibility i.e. return success for overflows without doing
692 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
693 * for overflow errors which gets converted to KERN_SUCCESS by
694 * vm_sanitize_get_kr.
695 */
696 vm_map_offset_t start, end;
697 vm_map_size_t size;
698
699 kret = vslock_sanitize(current_map(),
700 addr,
701 len,
702 VM_SANITIZE_CALLER_VSLOCK,
703 &start,
704 &end,
705 &size);
706 if (__improbable(kret != KERN_SUCCESS)) {
707 switch (vm_sanitize_get_kr(kret)) {
708 case KERN_SUCCESS:
709 return 0;
710 case KERN_INVALID_ADDRESS:
711 case KERN_NO_SPACE:
712 return ENOMEM;
713 case KERN_PROTECTION_FAILURE:
714 return EACCES;
715 default:
716 return EINVAL;
717 }
718 }
719 #endif /* XNU_PLATFORM_MacOSX */
720
721 kret = vm_map_wire_kernel(current_map(), addr,
722 vm_sanitize_compute_ut_end(addr, len),
723 vm_sanitize_wrap_prot(VM_PROT_READ | VM_PROT_WRITE),
724 VM_KERN_MEMORY_BSD,
725 FALSE);
726
727 switch (kret) {
728 case KERN_SUCCESS:
729 return 0;
730 case KERN_INVALID_ADDRESS:
731 case KERN_NO_SPACE:
732 return ENOMEM;
733 case KERN_PROTECTION_FAILURE:
734 return EACCES;
735 default:
736 return EINVAL;
737 }
738 }
739
740 int
vsunlock(user_addr_ut addr,user_size_ut len,__unused int dirtied)741 vsunlock(user_addr_ut addr, user_size_ut len, __unused int dirtied)
742 {
743 #if FIXME /* [ */
744 pmap_t pmap;
745 vm_page_t pg;
746 vm_map_offset_t vaddr;
747 ppnum_t paddr;
748 #endif /* FIXME ] */
749 kern_return_t kret;
750 vm_map_t map;
751
752 map = current_map();
753
754 #if FIXME /* [ */
755 if (dirtied) {
756 pmap = get_task_pmap(current_task());
757 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
758 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
759 vaddr += PAGE_SIZE) {
760 paddr = pmap_find_phys(pmap, vaddr);
761 pg = PHYS_TO_VM_PAGE(paddr);
762 vm_page_set_modified(pg);
763 }
764 }
765 #endif /* FIXME ] */
766 #ifdef lint
767 dirtied++;
768 #endif /* lint */
769
770 #if XNU_PLATFORM_MacOSX
771 /*
772 * Preserve previous behavior on macOS for overflows due to bin
773 * compatibility i.e. return success for overflows without doing
774 * anything. Error compatibility returns VM_ERR_RETURN_NOW (on macOS)
775 * for overflow errors which gets converted to KERN_SUCCESS by
776 * vm_sanitize_get_kr.
777 */
778 vm_map_offset_t start, end;
779 vm_map_size_t size;
780
781 kret = vslock_sanitize(map,
782 addr,
783 len,
784 VM_SANITIZE_CALLER_VSUNLOCK,
785 &start,
786 &end,
787 &size);
788 if (__improbable(kret != KERN_SUCCESS)) {
789 switch (vm_sanitize_get_kr(kret)) {
790 case KERN_SUCCESS:
791 return 0;
792 case KERN_INVALID_ADDRESS:
793 case KERN_NO_SPACE:
794 return ENOMEM;
795 case KERN_PROTECTION_FAILURE:
796 return EACCES;
797 default:
798 return EINVAL;
799 }
800 }
801 #endif /* XNU_PLATFORM_MacOSX */
802
803 kret = vm_map_unwire(map, addr,
804 vm_sanitize_compute_ut_end(addr, len), false);
805 switch (kret) {
806 case KERN_SUCCESS:
807 return 0;
808 case KERN_INVALID_ADDRESS:
809 case KERN_NO_SPACE:
810 return ENOMEM;
811 case KERN_PROTECTION_FAILURE:
812 return EACCES;
813 default:
814 return EINVAL;
815 }
816 }
817
818 int
subyte(user_addr_t addr,int byte)819 subyte(
820 user_addr_t addr,
821 int byte)
822 {
823 char character;
824
825 character = (char)byte;
826 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
827 }
828
829 int
suibyte(user_addr_t addr,int byte)830 suibyte(
831 user_addr_t addr,
832 int byte)
833 {
834 char character;
835
836 character = (char)byte;
837 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
838 }
839
840 int
fubyte(user_addr_t addr)841 fubyte(user_addr_t addr)
842 {
843 unsigned char byte;
844
845 if (copyin(addr, (void *) &byte, sizeof(char))) {
846 return -1;
847 }
848 return byte;
849 }
850
851 int
fuibyte(user_addr_t addr)852 fuibyte(user_addr_t addr)
853 {
854 unsigned char byte;
855
856 if (copyin(addr, (void *) &(byte), sizeof(char))) {
857 return -1;
858 }
859 return byte;
860 }
861
862 int
suword(user_addr_t addr,long word)863 suword(
864 user_addr_t addr,
865 long word)
866 {
867 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
868 }
869
870 long
fuword(user_addr_t addr)871 fuword(user_addr_t addr)
872 {
873 long word = 0;
874
875 if (copyin(addr, (void *) &word, sizeof(int))) {
876 return -1;
877 }
878 return word;
879 }
880
881 /* suiword and fuiword are the same as suword and fuword, respectively */
882
883 int
suiword(user_addr_t addr,long word)884 suiword(
885 user_addr_t addr,
886 long word)
887 {
888 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
889 }
890
891 long
fuiword(user_addr_t addr)892 fuiword(user_addr_t addr)
893 {
894 long word = 0;
895
896 if (copyin(addr, (void *) &word, sizeof(int))) {
897 return -1;
898 }
899 return word;
900 }
901
902 /*
903 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
904 * fetching and setting of process-sized size_t and pointer values.
905 */
906 int
sulong(user_addr_t addr,int64_t word)907 sulong(user_addr_t addr, int64_t word)
908 {
909 if (IS_64BIT_PROCESS(current_proc())) {
910 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
911 } else {
912 return suiword(addr, (long)word);
913 }
914 }
915
916 int64_t
fulong(user_addr_t addr)917 fulong(user_addr_t addr)
918 {
919 int64_t longword;
920
921 if (IS_64BIT_PROCESS(current_proc())) {
922 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
923 return -1;
924 }
925 return longword;
926 } else {
927 return (int64_t)fuiword(addr);
928 }
929 }
930
931 int
suulong(user_addr_t addr,uint64_t uword)932 suulong(user_addr_t addr, uint64_t uword)
933 {
934 if (IS_64BIT_PROCESS(current_proc())) {
935 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
936 } else {
937 return suiword(addr, (uint32_t)uword);
938 }
939 }
940
941 uint64_t
fuulong(user_addr_t addr)942 fuulong(user_addr_t addr)
943 {
944 uint64_t ulongword;
945
946 if (IS_64BIT_PROCESS(current_proc())) {
947 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
948 return -1ULL;
949 }
950 return ulongword;
951 } else {
952 return (uint64_t)fuiword(addr);
953 }
954 }
955
956 int
swapon(__unused proc_t procp,__unused struct swapon_args * uap,__unused int * retval)957 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
958 {
959 return ENOTSUP;
960 }
961
962 #if defined(SECURE_KERNEL)
963 static int kern_secure_kernel = 1;
964 #else
965 static int kern_secure_kernel = 0;
966 #endif
967
968 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
969 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
970 &shared_region_trace_level, 0, "");
971 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
972 &shared_region_version, 0, "");
973 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
974 &shared_region_persistence, 0, "");
975
976 /*
977 * shared_region_check_np:
978 *
979 * This system call is intended for dyld.
980 *
981 * dyld calls this when any process starts to see if the process's shared
982 * region is already set up and ready to use.
983 * This call returns the base address of the first mapping in the
984 * process's shared region's first mapping.
985 * dyld will then check what's mapped at that address.
986 *
987 * If the shared region is empty, dyld will then attempt to map the shared
988 * cache file in the shared region via the shared_region_map_and_slide_2_np()
989 * system call.
990 *
991 * If something's already mapped in the shared region, dyld will check if it
992 * matches the shared cache it would like to use for that process.
993 * If it matches, evrything's ready and the process can proceed and use the
994 * shared region.
995 * If it doesn't match, dyld will unmap the shared region and map the shared
996 * cache into the process's address space via mmap().
997 *
998 * A NULL pointer argument can be used by dyld to indicate it has unmapped
999 * the shared region. We will remove the shared_region reference from the task.
1000 *
1001 * ERROR VALUES
1002 * EINVAL no shared region
1003 * ENOMEM shared region is empty
1004 * EFAULT bad address for "start_address"
1005 */
1006 int
shared_region_check_np(__unused struct proc * p,struct shared_region_check_np_args * uap,__unused int * retvalp)1007 shared_region_check_np(
1008 __unused struct proc *p,
1009 struct shared_region_check_np_args *uap,
1010 __unused int *retvalp)
1011 {
1012 vm_shared_region_t shared_region;
1013 mach_vm_offset_t start_address = 0;
1014 int error = 0;
1015 kern_return_t kr = KERN_FAILURE;
1016 task_t task = current_task();
1017
1018 SHARED_REGION_TRACE_DEBUG(
1019 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1020 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1021 proc_getpid(p), p->p_comm,
1022 (uint64_t)uap->start_address));
1023
1024 /*
1025 * Special value of start_address used to indicate that map_with_linking() should
1026 * no longer be allowed in this process
1027 */
1028 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
1029 p->p_disallow_map_with_linking = TRUE;
1030 return 0;
1031 }
1032
1033 /* retrieve the current task's shared region */
1034 shared_region = vm_shared_region_get(task);
1035 if (shared_region != NULL) {
1036 /*
1037 * A NULL argument is used by dyld to indicate the task
1038 * has unmapped its shared region.
1039 */
1040 if (uap->start_address == 0) {
1041 /* unmap it first */
1042 vm_shared_region_remove(task, shared_region);
1043 vm_shared_region_set(task, NULL);
1044 } else {
1045 /* retrieve address of its first mapping... */
1046 kr = vm_shared_region_start_address(shared_region, &start_address);
1047 if (kr != KERN_SUCCESS) {
1048 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
1049 "check_np(0x%llx) "
1050 "vm_shared_region_start_address() returned 0x%x\n",
1051 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1052 proc_getpid(p), p->p_comm,
1053 (uint64_t)uap->start_address, kr));
1054 error = ENOMEM;
1055 }
1056 if (error == 0) {
1057 /* Insert the shared region submap and various bits of debug info into the task. */
1058 kr = vm_shared_region_update_task(task, shared_region, start_address);
1059 if (kr != KERN_SUCCESS) {
1060 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
1061 "check_np(0x%llx) "
1062 "vm_shared_update_task() returned 0x%x\n",
1063 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1064 proc_getpid(p), p->p_comm,
1065 (uint64_t)uap->start_address, kr));
1066
1067 error = ENOMEM;
1068 }
1069 }
1070 #if __has_feature(ptrauth_calls)
1071 /*
1072 * Remap any section of the shared library that
1073 * has authenticated pointers into private memory.
1074 */
1075 if ((error == 0) && (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS)) {
1076 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
1077 "check_np(0x%llx) "
1078 "vm_shared_region_auth_remap() failed\n",
1079 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1080 proc_getpid(p), p->p_comm,
1081 (uint64_t)uap->start_address));
1082 error = ENOMEM;
1083 }
1084 #endif /* __has_feature(ptrauth_calls) */
1085 /* Give the start address to the caller */
1086 if (error == 0) {
1087 error = copyout(&start_address,
1088 (user_addr_t) uap->start_address,
1089 sizeof(start_address));
1090 if (error != 0) {
1091 SHARED_REGION_TRACE_ERROR(
1092 ("shared_region: %p [%d(%s)] "
1093 "check_np(0x%llx) "
1094 "copyout(0x%llx) error %d\n",
1095 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1096 proc_getpid(p), p->p_comm,
1097 (uint64_t)uap->start_address, (uint64_t)start_address,
1098 error));
1099 }
1100 }
1101 }
1102 vm_shared_region_deallocate(shared_region);
1103 } else {
1104 /* no shared region ! */
1105 error = EINVAL;
1106 }
1107
1108 SHARED_REGION_TRACE_DEBUG(
1109 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1110 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1111 proc_getpid(p), p->p_comm,
1112 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1113
1114 return error;
1115 }
1116
1117
1118 static int
shared_region_copyin(struct proc * p,user_addr_t user_addr,unsigned int count,unsigned int element_size,void * kernel_data)1119 shared_region_copyin(
1120 struct proc *p,
1121 user_addr_t user_addr,
1122 unsigned int count,
1123 unsigned int element_size,
1124 void *kernel_data)
1125 {
1126 int error = 0;
1127 vm_size_t size = count * element_size;
1128
1129 error = copyin(user_addr, kernel_data, size);
1130 if (error) {
1131 SHARED_REGION_TRACE_ERROR(
1132 ("shared_region: %p [%d(%s)] map(): "
1133 "copyin(0x%llx, %ld) failed (error=%d)\n",
1134 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1135 proc_getpid(p), p->p_comm,
1136 (uint64_t)user_addr, (long)size, error));
1137 }
1138 return error;
1139 }
1140
1141 /*
1142 * A reasonable upper limit to prevent overflow of allocation/copyin.
1143 */
1144 #define _SR_FILE_MAPPINGS_MAX_FILES 256
1145
1146 /* forward declaration */
1147 __attribute__((noinline))
1148 static void shared_region_map_and_slide_cleanup(
1149 struct proc *p,
1150 uint32_t files_count,
1151 struct _sr_file_mappings *sr_file_mappings,
1152 struct vm_shared_region *shared_region);
1153
1154 /*
1155 * Setup part of _shared_region_map_and_slide().
1156 * It had to be broken out of _shared_region_map_and_slide() to
1157 * prevent compiler inlining from blowing out the stack.
1158 */
1159 __attribute__((noinline))
1160 static int
shared_region_map_and_slide_setup(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings,struct _sr_file_mappings ** sr_file_mappings,struct vm_shared_region ** shared_region_ptr,struct vnode * rdir_vp)1161 shared_region_map_and_slide_setup(
1162 struct proc *p,
1163 uint32_t files_count,
1164 struct shared_file_np *files,
1165 uint32_t mappings_count,
1166 struct shared_file_mapping_slide_np *mappings,
1167 struct _sr_file_mappings **sr_file_mappings,
1168 struct vm_shared_region **shared_region_ptr,
1169 struct vnode *rdir_vp)
1170 {
1171 int error = 0;
1172 struct _sr_file_mappings *srfmp;
1173 uint32_t mappings_next;
1174 struct vnode_attr va;
1175 off_t fs;
1176 #if CONFIG_MACF
1177 vm_prot_t maxprot = VM_PROT_ALL;
1178 #endif
1179 uint32_t i;
1180 struct vm_shared_region *shared_region = NULL;
1181 boolean_t is_driverkit = task_is_driver(current_task());
1182
1183 SHARED_REGION_TRACE_DEBUG(
1184 ("shared_region: %p [%d(%s)] -> map_and_slide_setup\n",
1185 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1186 proc_getpid(p), p->p_comm));
1187
1188 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
1189 error = E2BIG;
1190 goto done;
1191 }
1192 if (files_count == 0) {
1193 error = EINVAL;
1194 goto done;
1195 }
1196 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
1197 Z_WAITOK | Z_ZERO);
1198 if (*sr_file_mappings == NULL) {
1199 error = ENOMEM;
1200 goto done;
1201 }
1202 mappings_next = 0;
1203 for (i = 0; i < files_count; i++) {
1204 srfmp = &(*sr_file_mappings)[i];
1205 srfmp->fd = files[i].sf_fd;
1206 srfmp->mappings_count = files[i].sf_mappings_count;
1207 srfmp->mappings = &mappings[mappings_next];
1208 mappings_next += srfmp->mappings_count;
1209 if (mappings_next > mappings_count) {
1210 error = EINVAL;
1211 goto done;
1212 }
1213 srfmp->slide = files[i].sf_slide;
1214 }
1215
1216 /* get the process's shared region (setup in vm_map_exec()) */
1217 shared_region = vm_shared_region_get(current_task());
1218 *shared_region_ptr = shared_region;
1219 if (shared_region == NULL) {
1220 SHARED_REGION_TRACE_ERROR(
1221 ("shared_region: %p [%d(%s)] map(): "
1222 "no shared region\n",
1223 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1224 proc_getpid(p), p->p_comm));
1225 error = EINVAL;
1226 goto done;
1227 }
1228
1229 /*
1230 * Check the shared region matches the current root
1231 * directory of this process. Deny the mapping to
1232 * avoid tainting the shared region with something that
1233 * doesn't quite belong into it.
1234 */
1235 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
1236 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
1237 SHARED_REGION_TRACE_ERROR(
1238 ("shared_region: map(%p) root_dir mismatch\n",
1239 (void *)VM_KERNEL_ADDRPERM(current_thread())));
1240 error = EPERM;
1241 goto done;
1242 }
1243
1244
1245 for (srfmp = &(*sr_file_mappings)[0];
1246 srfmp < &(*sr_file_mappings)[files_count];
1247 srfmp++) {
1248 if (srfmp->mappings_count == 0) {
1249 /* no mappings here... */
1250 continue;
1251 }
1252
1253 /*
1254 * A file descriptor of -1 is used to indicate that the data
1255 * to be put in the shared region for this mapping comes directly
1256 * from the processes address space. Ensure we have proper alignments.
1257 */
1258 if (srfmp->fd == -1) {
1259 /* only allow one mapping per fd */
1260 if (srfmp->mappings_count > 1) {
1261 SHARED_REGION_TRACE_ERROR(
1262 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
1263 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1264 proc_getpid(p), p->p_comm));
1265 error = EINVAL;
1266 goto done;
1267 }
1268
1269 /*
1270 * The destination address and size must be page aligned.
1271 */
1272 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
1273 mach_vm_address_t dest_addr = mapping->sms_address;
1274 mach_vm_size_t map_size = mapping->sms_size;
1275 if (!vm_map_page_aligned(dest_addr, vm_map_page_mask(current_map()))) {
1276 SHARED_REGION_TRACE_ERROR(
1277 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
1278 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1279 proc_getpid(p), p->p_comm, dest_addr));
1280 error = EINVAL;
1281 goto done;
1282 }
1283 if (!vm_map_page_aligned(map_size, vm_map_page_mask(current_map()))) {
1284 SHARED_REGION_TRACE_ERROR(
1285 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
1286 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1287 proc_getpid(p), p->p_comm, map_size));
1288 error = EINVAL;
1289 goto done;
1290 }
1291 continue;
1292 }
1293
1294 /* get file structure from file descriptor */
1295 error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp);
1296 if (error) {
1297 SHARED_REGION_TRACE_ERROR(
1298 ("shared_region: %p [%d(%s)] map: "
1299 "fd=%d lookup failed (error=%d)\n",
1300 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1301 proc_getpid(p), p->p_comm, srfmp->fd, error));
1302 goto done;
1303 }
1304
1305 /* we need at least read permission on the file */
1306 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
1307 SHARED_REGION_TRACE_ERROR(
1308 ("shared_region: %p [%d(%s)] map: "
1309 "fd=%d not readable\n",
1310 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1311 proc_getpid(p), p->p_comm, srfmp->fd));
1312 error = EPERM;
1313 goto done;
1314 }
1315
1316 /* get vnode from file structure */
1317 error = vnode_getwithref((vnode_t)fp_get_data(srfmp->fp));
1318 if (error) {
1319 SHARED_REGION_TRACE_ERROR(
1320 ("shared_region: %p [%d(%s)] map: "
1321 "fd=%d getwithref failed (error=%d)\n",
1322 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1323 proc_getpid(p), p->p_comm, srfmp->fd, error));
1324 goto done;
1325 }
1326 srfmp->vp = (struct vnode *)fp_get_data(srfmp->fp);
1327
1328 /* make sure the vnode is a regular file */
1329 if (srfmp->vp->v_type != VREG) {
1330 SHARED_REGION_TRACE_ERROR(
1331 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1332 "not a file (type=%d)\n",
1333 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1334 proc_getpid(p), p->p_comm,
1335 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1336 srfmp->vp->v_name, srfmp->vp->v_type));
1337 error = EINVAL;
1338 goto done;
1339 }
1340
1341 #if CONFIG_MACF
1342 /* pass in 0 for the offset argument because AMFI does not need the offset
1343 * of the shared cache */
1344 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1345 srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, 0, &maxprot);
1346 if (error) {
1347 goto done;
1348 }
1349 #endif /* MAC */
1350
1351 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1352 /*
1353 * Check if the shared cache is in the trust cache;
1354 * if so, we can skip the root ownership check.
1355 */
1356 #if DEVELOPMENT || DEBUG
1357 /*
1358 * Skip both root ownership and trust cache check if
1359 * enforcement is disabled.
1360 */
1361 if (!cs_system_enforcement()) {
1362 goto after_root_check;
1363 }
1364 #endif /* DEVELOPMENT || DEBUG */
1365 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
1366 if (blob == NULL) {
1367 SHARED_REGION_TRACE_ERROR(
1368 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1369 "missing CS blob\n",
1370 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1371 proc_getpid(p), p->p_comm,
1372 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1373 srfmp->vp->v_name));
1374 goto root_check;
1375 }
1376 const uint8_t *cdhash = csblob_get_cdhash(blob);
1377 if (cdhash == NULL) {
1378 SHARED_REGION_TRACE_ERROR(
1379 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1380 "missing cdhash\n",
1381 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1382 proc_getpid(p), p->p_comm,
1383 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1384 srfmp->vp->v_name));
1385 goto root_check;
1386 }
1387
1388 bool in_trust_cache = false;
1389 TrustCacheQueryToken_t qt;
1390 if (query_trust_cache(kTCQueryTypeAll, cdhash, &qt) == KERN_SUCCESS) {
1391 TCType_t tc_type = kTCTypeInvalid;
1392 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
1393 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
1394 (tc_type == kTCTypeCryptex1BootOS ||
1395 tc_type == kTCTypeStatic ||
1396 tc_type == kTCTypeEngineering));
1397 }
1398 if (!in_trust_cache) {
1399 SHARED_REGION_TRACE_ERROR(
1400 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1401 "not in trust cache\n",
1402 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1403 proc_getpid(p), p->p_comm,
1404 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1405 srfmp->vp->v_name));
1406 goto root_check;
1407 }
1408 goto after_root_check;
1409 root_check:
1410 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1411
1412 /* The shared cache file must be owned by root */
1413 VATTR_INIT(&va);
1414 VATTR_WANTED(&va, va_uid);
1415 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1416 if (error) {
1417 SHARED_REGION_TRACE_ERROR(
1418 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1419 "vnode_getattr(%p) failed (error=%d)\n",
1420 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1421 proc_getpid(p), p->p_comm,
1422 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1423 srfmp->vp->v_name,
1424 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1425 error));
1426 goto done;
1427 }
1428 if (va.va_uid != 0) {
1429 SHARED_REGION_TRACE_ERROR(
1430 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1431 "owned by uid=%d instead of 0\n",
1432 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1433 proc_getpid(p), p->p_comm,
1434 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1435 srfmp->vp->v_name, va.va_uid));
1436 error = EPERM;
1437 goto done;
1438 }
1439
1440 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1441 after_root_check:
1442 #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
1443
1444 #if CONFIG_CSR
1445 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
1446 VATTR_INIT(&va);
1447 VATTR_WANTED(&va, va_flags);
1448 error = vnode_getattr(srfmp->vp, &va, vfs_context_current());
1449 if (error) {
1450 SHARED_REGION_TRACE_ERROR(
1451 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1452 "vnode_getattr(%p) failed (error=%d)\n",
1453 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1454 proc_getpid(p), p->p_comm,
1455 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1456 srfmp->vp->v_name,
1457 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1458 error));
1459 goto done;
1460 }
1461
1462 if (!(va.va_flags & SF_RESTRICTED)) {
1463 /*
1464 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
1465 * the shared cache file is NOT SIP-protected, so reject the
1466 * mapping request
1467 */
1468 SHARED_REGION_TRACE_ERROR(
1469 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
1470 "vnode is not SIP-protected. \n",
1471 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1472 proc_getpid(p), p->p_comm,
1473 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1474 srfmp->vp->v_name));
1475 error = EPERM;
1476 goto done;
1477 }
1478 }
1479 #else /* CONFIG_CSR */
1480
1481 /*
1482 * Devices without SIP/ROSP need to make sure that the shared cache
1483 * is either on the root volume or in the preboot cryptex volume.
1484 */
1485 assert(rdir_vp != NULL);
1486 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
1487 vnode_t preboot_vp = NULL;
1488 #if XNU_TARGET_OS_OSX
1489 #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
1490 #else
1491 #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
1492 #endif
1493 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
1494 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
1495 SHARED_REGION_TRACE_ERROR(
1496 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1497 "not on process' root volume nor preboot volume\n",
1498 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1499 proc_getpid(p), p->p_comm,
1500 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1501 srfmp->vp->v_name));
1502 error = EPERM;
1503 if (preboot_vp) {
1504 (void)vnode_put(preboot_vp);
1505 }
1506 goto done;
1507 } else if (preboot_vp) {
1508 (void)vnode_put(preboot_vp);
1509 }
1510 }
1511 #endif /* CONFIG_CSR */
1512
1513 if (scdir_enforce) {
1514 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1515 struct vnode *scdir_vp = NULL;
1516 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
1517 *expected_scdir_path != NULL;
1518 expected_scdir_path++) {
1519 /* get vnode for expected_scdir_path */
1520 error = vnode_lookup(*expected_scdir_path, 0, &scdir_vp, vfs_context_current());
1521 if (error) {
1522 SHARED_REGION_TRACE_ERROR(
1523 ("shared_region: %p [%d(%s)]: "
1524 "vnode_lookup(%s) failed (error=%d)\n",
1525 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1526 proc_getpid(p), p->p_comm,
1527 *expected_scdir_path, error));
1528 continue;
1529 }
1530
1531 /* check if parent is scdir_vp */
1532 assert(scdir_vp != NULL);
1533 if (vnode_parent(srfmp->vp) == scdir_vp) {
1534 (void)vnode_put(scdir_vp);
1535 scdir_vp = NULL;
1536 goto scdir_ok;
1537 }
1538 (void)vnode_put(scdir_vp);
1539 scdir_vp = NULL;
1540 }
1541 /* nothing matches */
1542 SHARED_REGION_TRACE_ERROR(
1543 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1544 "shared cache file not in expected directory\n",
1545 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1546 proc_getpid(p), p->p_comm,
1547 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1548 srfmp->vp->v_name));
1549 error = EPERM;
1550 goto done;
1551 }
1552 scdir_ok:
1553
1554 /* get vnode size */
1555 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
1556 if (error) {
1557 SHARED_REGION_TRACE_ERROR(
1558 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1559 "vnode_size(%p) failed (error=%d)\n",
1560 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1561 proc_getpid(p), p->p_comm,
1562 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1563 srfmp->vp->v_name,
1564 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
1565 goto done;
1566 }
1567 srfmp->file_size = fs;
1568
1569 /* get the file's memory object handle */
1570 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
1571 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
1572 SHARED_REGION_TRACE_ERROR(
1573 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1574 "no memory object\n",
1575 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1576 proc_getpid(p), p->p_comm,
1577 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1578 srfmp->vp->v_name));
1579 error = EINVAL;
1580 goto done;
1581 }
1582
1583 /* check that the mappings are properly covered by code signatures */
1584 if (!cs_system_enforcement()) {
1585 /* code signing is not enforced: no need to check */
1586 } else {
1587 for (i = 0; i < srfmp->mappings_count; i++) {
1588 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
1589 /* zero-filled mapping: not backed by the file */
1590 continue;
1591 }
1592 if (ubc_cs_is_range_codesigned(srfmp->vp,
1593 srfmp->mappings[i].sms_file_offset,
1594 srfmp->mappings[i].sms_size)) {
1595 /* this mapping is fully covered by code signatures */
1596 continue;
1597 }
1598 SHARED_REGION_TRACE_ERROR(
1599 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1600 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
1601 "is not code-signed\n",
1602 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1603 proc_getpid(p), p->p_comm,
1604 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
1605 srfmp->vp->v_name,
1606 i, srfmp->mappings_count,
1607 srfmp->mappings[i].sms_address,
1608 srfmp->mappings[i].sms_size,
1609 srfmp->mappings[i].sms_file_offset,
1610 srfmp->mappings[i].sms_max_prot,
1611 srfmp->mappings[i].sms_init_prot));
1612 error = EINVAL;
1613 goto done;
1614 }
1615 }
1616 }
1617 done:
1618 if (error != 0) {
1619 shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region);
1620 *sr_file_mappings = NULL;
1621 *shared_region_ptr = NULL;
1622 }
1623 SHARED_REGION_TRACE_DEBUG(
1624 ("shared_region: %p [%d(%s)] map_and_slide_setup <- %d\n",
1625 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1626 proc_getpid(p), p->p_comm, error));
1627 return error;
1628 }
1629
1630 /*
1631 * shared_region_map_np()
1632 *
1633 * This system call is intended for dyld.
1634 *
1635 * dyld uses this to map a shared cache file into a shared region.
1636 * This is usually done only the first time a shared cache is needed.
1637 * Subsequent processes will just use the populated shared region without
1638 * requiring any further setup.
1639 */
1640 static int
_shared_region_map_and_slide(struct proc * p,uint32_t files_count,struct shared_file_np * files,uint32_t mappings_count,struct shared_file_mapping_slide_np * mappings)1641 _shared_region_map_and_slide(
1642 struct proc *p,
1643 uint32_t files_count,
1644 struct shared_file_np *files,
1645 uint32_t mappings_count,
1646 struct shared_file_mapping_slide_np *mappings)
1647 {
1648 int error = 0;
1649 kern_return_t kr = KERN_SUCCESS;
1650 struct _sr_file_mappings *sr_file_mappings = NULL;
1651 struct vnode *rdir_vp = NULL;
1652 struct vm_shared_region *shared_region = NULL;
1653
1654 /*
1655 * Get a reference to the current proc's root dir.
1656 * Need this to prevent racing with chroot.
1657 */
1658 proc_fdlock(p);
1659 rdir_vp = p->p_fd.fd_rdir;
1660 if (rdir_vp == NULL) {
1661 rdir_vp = rootvnode;
1662 }
1663 assert(rdir_vp != NULL);
1664 vnode_get(rdir_vp);
1665 proc_fdunlock(p);
1666
1667 /*
1668 * Turn files, mappings into sr_file_mappings and other setup.
1669 */
1670 error = shared_region_map_and_slide_setup(p, files_count,
1671 files, mappings_count, mappings,
1672 &sr_file_mappings, &shared_region, rdir_vp);
1673 if (error != 0) {
1674 vnode_put(rdir_vp);
1675 return error;
1676 }
1677
1678 /* map the file(s) into that shared region's submap */
1679 kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings);
1680 if (kr != KERN_SUCCESS) {
1681 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
1682 "vm_shared_region_map_file() failed kr=0x%x\n",
1683 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1684 proc_getpid(p), p->p_comm, kr));
1685 }
1686
1687 /* convert kern_return_t to errno */
1688 switch (kr) {
1689 case KERN_SUCCESS:
1690 error = 0;
1691 break;
1692 case KERN_INVALID_ADDRESS:
1693 error = EFAULT;
1694 break;
1695 case KERN_PROTECTION_FAILURE:
1696 error = EPERM;
1697 break;
1698 case KERN_NO_SPACE:
1699 error = ENOMEM;
1700 break;
1701 case KERN_FAILURE:
1702 case KERN_INVALID_ARGUMENT:
1703 default:
1704 error = EINVAL;
1705 break;
1706 }
1707
1708 /*
1709 * Mark that this process is now using split libraries.
1710 */
1711 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
1712 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1713 }
1714
1715 vnode_put(rdir_vp);
1716 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
1717
1718 SHARED_REGION_TRACE_DEBUG(
1719 ("shared_region: %p [%d(%s)] <- map\n",
1720 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1721 proc_getpid(p), p->p_comm));
1722
1723 return error;
1724 }
1725
1726 /*
1727 * Clean up part of _shared_region_map_and_slide()
1728 * It had to be broken out of _shared_region_map_and_slide() to
1729 * prevent compiler inlining from blowing out the stack.
1730 */
1731 __attribute__((noinline))
1732 static void
shared_region_map_and_slide_cleanup(struct proc * p,uint32_t files_count,struct _sr_file_mappings * sr_file_mappings,struct vm_shared_region * shared_region)1733 shared_region_map_and_slide_cleanup(
1734 struct proc *p,
1735 uint32_t files_count,
1736 struct _sr_file_mappings *sr_file_mappings,
1737 struct vm_shared_region *shared_region)
1738 {
1739 struct _sr_file_mappings *srfmp;
1740 struct vnode_attr va;
1741
1742 if (sr_file_mappings != NULL) {
1743 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
1744 if (srfmp->vp != NULL) {
1745 vnode_lock_spin(srfmp->vp);
1746 srfmp->vp->v_flag |= VSHARED_DYLD;
1747 vnode_unlock(srfmp->vp);
1748
1749 /* update the vnode's access time */
1750 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
1751 VATTR_INIT(&va);
1752 nanotime(&va.va_access_time);
1753 VATTR_SET_ACTIVE(&va, va_access_time);
1754 vnode_setattr(srfmp->vp, &va, vfs_context_current());
1755 }
1756
1757 #if NAMEDSTREAMS
1758 /*
1759 * If the shared cache is compressed, it may
1760 * have a namedstream vnode instantiated for
1761 * for it. That namedstream vnode will also
1762 * have to be marked with VSHARED_DYLD.
1763 */
1764 if (vnode_hasnamedstreams(srfmp->vp)) {
1765 vnode_t svp;
1766 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
1767 NS_OPEN, 0, vfs_context_kernel()) == 0) {
1768 vnode_lock_spin(svp);
1769 svp->v_flag |= VSHARED_DYLD;
1770 vnode_unlock(svp);
1771 vnode_put(svp);
1772 }
1773 }
1774 #endif /* NAMEDSTREAMS */
1775 /*
1776 * release the vnode...
1777 * ubc_map() still holds it for us in the non-error case
1778 */
1779 (void) vnode_put(srfmp->vp);
1780 srfmp->vp = NULL;
1781 }
1782 if (srfmp->fp != NULL) {
1783 /* release the file descriptor */
1784 fp_drop(p, srfmp->fd, srfmp->fp, 0);
1785 srfmp->fp = NULL;
1786 }
1787 }
1788 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
1789 }
1790
1791 if (shared_region != NULL) {
1792 vm_shared_region_deallocate(shared_region);
1793 }
1794 }
1795
1796 /*
1797 * For each file mapped, we may have mappings for:
1798 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
1799 * so let's round up to 8 mappings per file.
1800 */
1801 #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
1802
1803 /*
1804 * This is the new interface for setting up shared region mappings.
1805 *
1806 * The slide used for shared regions setup using this interface is done differently
1807 * from the old interface. The slide value passed in the shared_files_np represents
1808 * a max value. The kernel will choose a random value based on that, then use it
1809 * for all shared regions.
1810 */
1811 #if defined (__x86_64__)
1812 #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
1813 #else
1814 #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
1815 #endif
1816
1817 static inline __result_use_check kern_return_t
shared_region_map_and_slide_2_np_sanitize(struct proc * p,user_addr_t mappings_userspace_addr,unsigned int count,shared_file_mapping_slide_np_t * mappings)1818 shared_region_map_and_slide_2_np_sanitize(
1819 struct proc *p,
1820 user_addr_t mappings_userspace_addr,
1821 unsigned int count,
1822 shared_file_mapping_slide_np_t *mappings)
1823 {
1824 kern_return_t kr;
1825 vm_map_t map = current_map();
1826 mach_vm_address_t addr, end;
1827 mach_vm_offset_t offset, offset_end;
1828 mach_vm_size_t size, offset_size;
1829 user_addr_t slide_start, slide_end, slide_size;
1830 vm_prot_t cur;
1831 vm_prot_t max;
1832
1833 user_addr_t user_addr = mappings_userspace_addr;
1834
1835 for (size_t i = 0; i < count; i++) {
1836 shared_file_mapping_slide_np_ut mapping_u;
1837 /*
1838 * First we bring each mapping struct into our kernel stack to
1839 * avoid TOCTOU.
1840 */
1841 kr = shared_region_copyin(
1842 p,
1843 user_addr,
1844 1, // copy 1 element at a time
1845 sizeof(shared_file_mapping_slide_np_ut),
1846 &mapping_u);
1847 if (__improbable(kr != KERN_SUCCESS)) {
1848 return kr;
1849 }
1850
1851 /*
1852 * Then, we sanitize the data on the kernel stack.
1853 */
1854 kr = vm_sanitize_addr_size(
1855 mapping_u.sms_address_u,
1856 mapping_u.sms_size_u,
1857 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1858 map,
1859 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1860 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_START
1861 | VM_SANITIZE_FLAGS_CHECK_ALIGNED_SIZE),
1862 &addr,
1863 &end,
1864 &size);
1865 if (__improbable(kr != KERN_SUCCESS)) {
1866 return kr;
1867 }
1868
1869 kr = vm_sanitize_addr_size(
1870 mapping_u.sms_file_offset_u,
1871 mapping_u.sms_size_u,
1872 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1873 PAGE_MASK,
1874 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1875 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1876 &offset,
1877 &offset_end,
1878 &offset_size);
1879 if (__improbable(kr != KERN_SUCCESS)) {
1880 return kr;
1881 }
1882 if (__improbable(0 != (offset & vm_map_page_mask(map)))) {
1883 return KERN_INVALID_ARGUMENT;
1884 }
1885
1886 /*
1887 * Unsafe access is immediately followed by wrap to
1888 * convert from addr to size.
1889 */
1890 mach_vm_size_ut sms_slide_size_u =
1891 vm_sanitize_wrap_size(
1892 VM_SANITIZE_UNSAFE_UNWRAP(
1893 mapping_u.sms_slide_size_u));
1894
1895 kr = vm_sanitize_addr_size(
1896 mapping_u.sms_slide_start_u,
1897 sms_slide_size_u,
1898 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1899 map,
1900 (VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH
1901 | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES),
1902 &slide_start,
1903 &slide_end,
1904 &slide_size);
1905 if (__improbable(kr != KERN_SUCCESS)) {
1906 return kr;
1907 }
1908
1909 kr = vm_sanitize_cur_and_max_prots(
1910 mapping_u.sms_init_prot_u,
1911 mapping_u.sms_max_prot_u,
1912 VM_SANITIZE_CALLER_SHARED_REGION_MAP_AND_SLIDE_2_NP,
1913 map,
1914 VM_PROT_SFM_EXTENSIONS_MASK | VM_PROT_TPRO,
1915 &cur,
1916 &max);
1917 if (__improbable(kr != KERN_SUCCESS)) {
1918 return kr;
1919 }
1920
1921 /*
1922 * Finally, we move the data from the kernel stack to our
1923 * caller-allocated kernel heap buffer.
1924 */
1925 mappings[i].sms_address = addr;
1926 mappings[i].sms_size = size;
1927 mappings[i].sms_file_offset = offset;
1928 mappings[i].sms_slide_size = slide_size;
1929 mappings[i].sms_slide_start = slide_start;
1930 mappings[i].sms_max_prot = max;
1931 mappings[i].sms_init_prot = cur;
1932
1933 if (__improbable(os_add_overflow(
1934 user_addr,
1935 sizeof(shared_file_mapping_slide_np_ut),
1936 &user_addr))) {
1937 return KERN_INVALID_ARGUMENT;
1938 }
1939 }
1940
1941 return KERN_SUCCESS;
1942 }
1943
1944 int
shared_region_map_and_slide_2_np(struct proc * p,struct shared_region_map_and_slide_2_np_args * uap,__unused int * retvalp)1945 shared_region_map_and_slide_2_np(
1946 struct proc *p,
1947 struct shared_region_map_and_slide_2_np_args *uap,
1948 __unused int *retvalp)
1949 {
1950 unsigned int files_count;
1951 struct shared_file_np *shared_files = NULL;
1952 unsigned int mappings_count;
1953 struct shared_file_mapping_slide_np *mappings = NULL;
1954 kern_return_t kr = KERN_SUCCESS;
1955
1956 files_count = uap->files_count;
1957 mappings_count = uap->mappings_count;
1958
1959 SHARED_REGION_TRACE_DEBUG(
1960 ("shared_region: %p [%d(%s)] -> map_and_slide(0x%llx)\n",
1961 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1962 proc_getpid(p), p->p_comm,
1963 (uint64_t)uap->mappings_u));
1964
1965 if (files_count == 0) {
1966 SHARED_REGION_TRACE_INFO(
1967 ("shared_region: %p [%d(%s)] map(): "
1968 "no files\n",
1969 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1970 proc_getpid(p), p->p_comm));
1971 kr = 0; /* no files to map: we're done ! */
1972 goto done;
1973 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
1974 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
1975 if (shared_files == NULL) {
1976 kr = KERN_RESOURCE_SHORTAGE;
1977 goto done;
1978 }
1979 } else {
1980 SHARED_REGION_TRACE_ERROR(
1981 ("shared_region: %p [%d(%s)] map(): "
1982 "too many files (%d) max %d\n",
1983 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1984 proc_getpid(p), p->p_comm,
1985 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
1986 kr = KERN_FAILURE;
1987 goto done;
1988 }
1989
1990 if (mappings_count == 0) {
1991 SHARED_REGION_TRACE_INFO(
1992 ("shared_region: %p [%d(%s)] map(): "
1993 "no mappings\n",
1994 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1995 proc_getpid(p), p->p_comm));
1996 kr = 0; /* no mappings: we're done ! */
1997 goto done;
1998 } else if (mappings_count <= SFM_MAX) {
1999 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2000 if (mappings == NULL) {
2001 kr = KERN_RESOURCE_SHORTAGE;
2002 goto done;
2003 }
2004 } else {
2005 SHARED_REGION_TRACE_ERROR(
2006 ("shared_region: %p [%d(%s)] map(): "
2007 "too many mappings (%d) max %d\n",
2008 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2009 proc_getpid(p), p->p_comm,
2010 mappings_count, SFM_MAX));
2011 kr = KERN_FAILURE;
2012 goto done;
2013 }
2014
2015 /*
2016 * struct shared_file_np does not have fields that are subject to
2017 * sanitization, it is thus copied from userspace as is.
2018 */
2019 kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files);
2020 if (kr != KERN_SUCCESS) {
2021 SHARED_REGION_TRACE_ERROR(
2022 ("shared_region: %p [%d(%s)] copyin() returned 0x%x\n",
2023 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2024 proc_getpid(p), p->p_comm, kr));
2025 goto done;
2026 }
2027
2028 kr = shared_region_map_and_slide_2_np_sanitize(
2029 p,
2030 uap->mappings_u,
2031 mappings_count,
2032 mappings);
2033 if (__improbable(kr != KERN_SUCCESS)) {
2034 SHARED_REGION_TRACE_ERROR(
2035 ("shared_region: %p [%d(%s)] sanitize() returned 0x%x\n",
2036 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2037 proc_getpid(p), p->p_comm, kr));
2038 kr = vm_sanitize_get_kr(kr);
2039 goto done;
2040 }
2041
2042 uint32_t max_slide = shared_files[0].sf_slide;
2043 uint32_t random_val;
2044 uint32_t slide_amount;
2045
2046 if (max_slide != 0) {
2047 read_random(&random_val, sizeof random_val);
2048 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2049 } else {
2050 slide_amount = 0;
2051 }
2052 #if DEVELOPMENT || DEBUG
2053 extern bool bootarg_disable_aslr;
2054 if (bootarg_disable_aslr) {
2055 slide_amount = 0;
2056 }
2057 #endif /* DEVELOPMENT || DEBUG */
2058
2059 /*
2060 * Fix up the mappings to reflect the desired slide.
2061 */
2062 unsigned int f;
2063 unsigned int m = 0;
2064 unsigned int i;
2065 for (f = 0; f < files_count; ++f) {
2066 shared_files[f].sf_slide = slide_amount;
2067 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2068 if (m >= mappings_count) {
2069 SHARED_REGION_TRACE_ERROR(
2070 ("shared_region: %p [%d(%s)] map(): "
2071 "mapping count argument was too small\n",
2072 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2073 proc_getpid(p), p->p_comm));
2074 kr = KERN_FAILURE;
2075 goto done;
2076 }
2077 if (__improbable(
2078 os_add_overflow(
2079 mappings[m].sms_address,
2080 slide_amount,
2081 &mappings[m].sms_address))) {
2082 kr = KERN_INVALID_ARGUMENT;
2083 goto done;
2084 }
2085 if (mappings[m].sms_slide_size != 0) {
2086 mach_vm_address_t discard;
2087 /* Slide and check that new start/size pairs do not overflow. */
2088 if (__improbable(
2089 os_add_overflow(
2090 mappings[m].sms_slide_start,
2091 slide_amount,
2092 &mappings[m].sms_slide_start) ||
2093 os_add_overflow(
2094 mappings[m].sms_slide_start,
2095 mappings[m].sms_slide_size,
2096 &discard))) {
2097 kr = KERN_INVALID_ARGUMENT;
2098 goto done;
2099 }
2100 }
2101 }
2102 }
2103
2104 kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings);
2105 done:
2106 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
2107 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
2108
2109 SHARED_REGION_TRACE_DEBUG(
2110 ("shared_region: %p [%d(%s)] map_and_slide(0x%llx) <- 0x%x\n",
2111 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2112 proc_getpid(p), p->p_comm,
2113 (uint64_t)uap->mappings_u, kr));
2114
2115 return kr;
2116 }
2117
2118
2119 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_total_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_total_success, "");
2120 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_total_fail, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_total_fail, "");
2121 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_overflow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_overflow, "");
2122 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_offset, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_offset, "");
2123 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_addr, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_addr, "");
2124 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_prot, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_prot, "");
2125 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_file, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_file, "");
2126 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_shadows, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_shadows, "");
2127 SYSCTL_QUAD(_vm, OID_AUTO, vmwls_bad_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_with_linking_stats.vmwls_bad_cow, "");
2128
2129 /*
2130 * A syscall for dyld to use to map data pages that need load time relocation fixups.
2131 * The fixups are performed by a custom pager during page-in, so the pages still appear
2132 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
2133 * on demand later, all w/o using the compressor.
2134 *
2135 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
2136 * running, they are COW'd as normal.
2137 */
2138 int
map_with_linking_np(struct proc * p,struct map_with_linking_np_args * uap,__unused int * retvalp)2139 map_with_linking_np(
2140 struct proc *p,
2141 struct map_with_linking_np_args *uap,
2142 __unused int *retvalp)
2143 {
2144 uint32_t region_count;
2145 uint32_t r;
2146 struct mwl_region *regions = NULL;
2147 struct mwl_region *rp;
2148 uint32_t link_info_size;
2149 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
2150 struct mwl_info_hdr *info_hdr = NULL;
2151 uint64_t binds_size;
2152 int fd;
2153 struct fileproc *fp = NULL;
2154 struct vnode *vp = NULL;
2155 size_t file_size;
2156 off_t fs;
2157 struct vnode_attr va;
2158 memory_object_control_t file_control = NULL;
2159 int error;
2160 kern_return_t kr = KERN_SUCCESS;
2161
2162 /*
2163 * Check if dyld has told us it finished with this call.
2164 */
2165 if (p->p_disallow_map_with_linking) {
2166 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
2167 __func__, proc_getpid(p), p->p_comm);
2168 kr = KERN_FAILURE;
2169 goto done;
2170 }
2171
2172 /*
2173 * First we do some sanity checking on what dyld has passed us.
2174 */
2175 region_count = uap->region_count;
2176 link_info_size = uap->link_info_size;
2177 if (region_count == 0) {
2178 printf("%s: [%d(%s)]: region_count == 0\n",
2179 __func__, proc_getpid(p), p->p_comm);
2180 kr = KERN_FAILURE;
2181 goto done;
2182 }
2183 if (region_count > MWL_MAX_REGION_COUNT) {
2184 printf("%s: [%d(%s)]: region_count too big %d\n",
2185 __func__, proc_getpid(p), p->p_comm, region_count);
2186 kr = KERN_FAILURE;
2187 goto done;
2188 }
2189
2190 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
2191 printf("%s: [%d(%s)]: link_info_size too small\n",
2192 __func__, proc_getpid(p), p->p_comm);
2193 kr = KERN_FAILURE;
2194 goto done;
2195 }
2196 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
2197 printf("%s: [%d(%s)]: link_info_size too big %d\n",
2198 __func__, proc_getpid(p), p->p_comm, link_info_size);
2199 kr = KERN_FAILURE;
2200 goto done;
2201 }
2202
2203 /*
2204 * Allocate and copyin the regions and link info
2205 */
2206 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
2207 if (regions == NULL) {
2208 printf("%s: [%d(%s)]: failed to allocate regions\n",
2209 __func__, proc_getpid(p), p->p_comm);
2210 kr = KERN_RESOURCE_SHORTAGE;
2211 goto done;
2212 }
2213 kr = shared_region_copyin(p, uap->regions, region_count, sizeof(regions[0]), regions);
2214 if (kr != KERN_SUCCESS) {
2215 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
2216 __func__, proc_getpid(p), p->p_comm, kr);
2217 goto done;
2218 }
2219
2220 link_info = kalloc_data(link_info_size, Z_WAITOK);
2221 if (link_info == NULL) {
2222 printf("%s: [%d(%s)]: failed to allocate link_info\n",
2223 __func__, proc_getpid(p), p->p_comm);
2224 kr = KERN_RESOURCE_SHORTAGE;
2225 goto done;
2226 }
2227 kr = shared_region_copyin(p, uap->link_info, 1, link_info_size, link_info);
2228 if (kr != KERN_SUCCESS) {
2229 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
2230 __func__, proc_getpid(p), p->p_comm, kr);
2231 goto done;
2232 }
2233
2234 /*
2235 * Do some verification the data structures.
2236 */
2237 info_hdr = (struct mwl_info_hdr *)link_info;
2238 if (info_hdr->mwli_version != MWL_INFO_VERS) {
2239 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
2240 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
2241 kr = KERN_FAILURE;
2242 goto done;
2243 }
2244
2245 if (info_hdr->mwli_binds_offset > link_info_size) {
2246 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
2247 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
2248 kr = KERN_FAILURE;
2249 goto done;
2250 }
2251
2252 /* some older devs have s/w page size > h/w page size, no need to support them */
2253 if (info_hdr->mwli_page_size != PAGE_SIZE) {
2254 /* no printf, since this is expected on some devices */
2255 kr = KERN_INVALID_ARGUMENT;
2256 goto done;
2257 }
2258
2259 binds_size = (uint64_t)info_hdr->mwli_binds_count *
2260 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
2261 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
2262 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
2263 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
2264 kr = KERN_FAILURE;
2265 goto done;
2266 }
2267
2268 if (info_hdr->mwli_chains_offset > link_info_size) {
2269 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
2270 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
2271 kr = KERN_FAILURE;
2272 goto done;
2273 }
2274
2275
2276 /*
2277 * Ensure the chained starts in the link info and make sure the
2278 * segment info offsets are within bounds.
2279 */
2280 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
2281 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
2282 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2283 kr = KERN_FAILURE;
2284 goto done;
2285 }
2286 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
2287 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
2288 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
2289 kr = KERN_FAILURE;
2290 goto done;
2291 }
2292
2293 /* Note that more verification of offsets is done in the pager itself */
2294
2295 /*
2296 * Ensure we've only been given one FD and verify valid protections.
2297 */
2298 fd = regions[0].mwlr_fd;
2299 for (r = 0; r < region_count; ++r) {
2300 if (regions[r].mwlr_fd != fd) {
2301 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
2302 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
2303 kr = KERN_FAILURE;
2304 goto done;
2305 }
2306
2307 /*
2308 * Only allow data mappings and not zero fill. Permit TPRO
2309 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
2310 */
2311 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
2312 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
2313 __func__, proc_getpid(p), p->p_comm);
2314 kr = KERN_FAILURE;
2315 goto done;
2316 }
2317 if (regions[r].mwlr_protections & VM_PROT_ZF) {
2318 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
2319 __func__, proc_getpid(p), p->p_comm, r);
2320 kr = KERN_FAILURE;
2321 goto done;
2322 }
2323 if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
2324 !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
2325 printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
2326 __func__, proc_getpid(p), p->p_comm, r);
2327 kr = KERN_FAILURE;
2328 goto done;
2329 }
2330 }
2331
2332
2333 /* get file structure from file descriptor */
2334 error = fp_get_ftype(p, fd, DTYPE_VNODE, EINVAL, &fp);
2335 if (error) {
2336 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
2337 __func__, proc_getpid(p), p->p_comm, error);
2338 kr = KERN_FAILURE;
2339 goto done;
2340 }
2341
2342 /* We need at least read permission on the file */
2343 if (!(fp->fp_glob->fg_flag & FREAD)) {
2344 printf("%s: [%d(%s)]: not readable\n",
2345 __func__, proc_getpid(p), p->p_comm);
2346 kr = KERN_FAILURE;
2347 goto done;
2348 }
2349
2350 /* Get the vnode from file structure */
2351 vp = (struct vnode *)fp_get_data(fp);
2352 error = vnode_getwithref(vp);
2353 if (error) {
2354 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
2355 __func__, proc_getpid(p), p->p_comm, error);
2356 kr = KERN_FAILURE;
2357 vp = NULL; /* just to be sure */
2358 goto done;
2359 }
2360
2361 /* Make sure the vnode is a regular file */
2362 if (vp->v_type != VREG) {
2363 printf("%s: [%d(%s)]: vnode not VREG\n",
2364 __func__, proc_getpid(p), p->p_comm);
2365 kr = KERN_FAILURE;
2366 goto done;
2367 }
2368
2369 /* get vnode size */
2370 error = vnode_size(vp, &fs, vfs_context_current());
2371 if (error) {
2372 goto done;
2373 }
2374 file_size = fs;
2375
2376 /* get the file's memory object handle */
2377 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
2378 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
2379 printf("%s: [%d(%s)]: no memory object\n",
2380 __func__, proc_getpid(p), p->p_comm);
2381 kr = KERN_FAILURE;
2382 goto done;
2383 }
2384
2385 for (r = 0; r < region_count; ++r) {
2386 rp = ®ions[r];
2387
2388 #if CONFIG_MACF
2389 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
2390 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
2391 fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, rp->mwlr_file_offset, &prot);
2392 if (error) {
2393 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
2394 __func__, proc_getpid(p), p->p_comm, r, error);
2395 kr = KERN_FAILURE;
2396 goto done;
2397 }
2398 #endif /* MAC */
2399
2400 /* check that the mappings are properly covered by code signatures */
2401 if (cs_system_enforcement()) {
2402 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
2403 printf("%s: [%d(%s)]: region %d, not code signed\n",
2404 __func__, proc_getpid(p), p->p_comm, r);
2405 kr = KERN_FAILURE;
2406 goto done;
2407 }
2408 }
2409 }
2410
2411 /* update the vnode's access time */
2412 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
2413 VATTR_INIT(&va);
2414 nanotime(&va.va_access_time);
2415 VATTR_SET_ACTIVE(&va, va_access_time);
2416 vnode_setattr(vp, &va, vfs_context_current());
2417 }
2418
2419 /* get the VM to do the work */
2420 kr = vm_map_with_linking(proc_task(p), regions, region_count, &link_info, link_info_size, file_control);
2421
2422 done:
2423 if (fp != NULL) {
2424 /* release the file descriptor */
2425 fp_drop(p, fd, fp, 0);
2426 }
2427 if (vp != NULL) {
2428 (void)vnode_put(vp);
2429 }
2430 if (regions != NULL) {
2431 kfree_data(regions, region_count * sizeof(regions[0]));
2432 }
2433 /* link info is NULL if it is used in the pager, if things worked */
2434 if (link_info != NULL) {
2435 kfree_data(link_info, link_info_size);
2436 }
2437
2438 switch (kr) {
2439 case KERN_SUCCESS:
2440 return 0;
2441 case KERN_RESOURCE_SHORTAGE:
2442 return ENOMEM;
2443 default:
2444 return EINVAL;
2445 }
2446 }
2447
2448 #if DEBUG || DEVELOPMENT
2449 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
2450 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
2451 SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
2452 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
2453 #endif /* DEBUG || DEVELOPMENT */
2454
2455 /* sysctl overflow room */
2456
2457 SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
2458 (int *) &page_size, 0, "vm page size");
2459
2460 /* vm_page_free_target is provided as a makeshift solution for applications that want to
2461 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
2462 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
2463 extern unsigned int vm_page_free_target;
2464 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
2465 &vm_page_free_target, 0, "Pageout daemon free target");
2466
2467 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
2468 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
2469
2470 static int
2471 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
2472 {
2473 #pragma unused(oidp, arg1, arg2)
2474 unsigned int page_free_wanted;
2475
2476 page_free_wanted = mach_vm_ctl_page_free_wanted();
2477 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
2478 }
2479 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
2480 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
2481 0, 0, vm_ctl_page_free_wanted, "I", "");
2482
2483 extern unsigned int vm_page_purgeable_count;
2484 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2485 &vm_page_purgeable_count, 0, "Purgeable page count");
2486
2487 extern unsigned int vm_page_purgeable_wired_count;
2488 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2489 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
2490
2491 extern unsigned int vm_page_kern_lpage_count;
2492 SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2493 &vm_page_kern_lpage_count, 0, "kernel used large pages");
2494
2495 SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
2496 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
2497 SCALABLE_COUNTER_DECLARE(vm_page_grab_count_kern);
2498 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed_kern, vm_page_grab_count_kern, "Total pages grabbed (kernel)");
2499 SCALABLE_COUNTER_DECLARE(vm_page_grab_count_iopl);
2500 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed_iopl, vm_page_grab_count_iopl, "Total pages grabbed (iopl)");
2501 SCALABLE_COUNTER_DECLARE(vm_page_grab_count_upl);
2502 SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed_upl, vm_page_grab_count_upl, "Total pages grabbed (upl)");
2503
2504
2505 #if DEVELOPMENT || DEBUG
2506 SCALABLE_COUNTER_DECLARE(vm_page_deactivate_behind_count);
2507 SYSCTL_SCALABLE_COUNTER(_vm, pages_deactivated_behind, vm_page_deactivate_behind_count,
2508 "Number of pages deactivated behind");
2509 #endif
2510
2511 #if DEVELOPMENT || DEBUG
2512 #if __ARM_MIXED_PAGE_SIZE__
2513 static int vm_mixed_pagesize_supported = 1;
2514 #else
2515 static int vm_mixed_pagesize_supported = 0;
2516 #endif /*__ARM_MIXED_PAGE_SIZE__ */
2517 SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
2518 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
2519
2520 SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
2521 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
2522
2523 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
2524 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
2525 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
2526 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
2527 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
2528 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
2529
2530 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2531 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
2532 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2533 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
2534 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2535 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
2536 SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2537 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
2538 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
2539 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
2540 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
2541 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
2542 #endif /* DEVELOPMENT || DEBUG */
2543
2544 extern int madvise_free_debug;
2545 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
2546 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
2547 extern int madvise_free_debug_sometimes;
2548 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
2549 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
2550
2551 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
2552 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
2553 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2554 &vm_page_stats_reusable.reusable_pages_success, "");
2555 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2556 &vm_page_stats_reusable.reusable_pages_failure, "");
2557 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2558 &vm_page_stats_reusable.reusable_pages_shared, "");
2559 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2560 &vm_page_stats_reusable.all_reusable_calls, "");
2561 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2562 &vm_page_stats_reusable.partial_reusable_calls, "");
2563 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2564 &vm_page_stats_reusable.reuse_pages_success, "");
2565 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2566 &vm_page_stats_reusable.reuse_pages_failure, "");
2567 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2568 &vm_page_stats_reusable.all_reuse_calls, "");
2569 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
2570 &vm_page_stats_reusable.partial_reuse_calls, "");
2571 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
2572 &vm_page_stats_reusable.can_reuse_success, "");
2573 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
2574 &vm_page_stats_reusable.can_reuse_failure, "");
2575 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
2576 &vm_page_stats_reusable.reusable_reclaimed, "");
2577 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
2578 &vm_page_stats_reusable.reusable_nonwritable, "");
2579 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2580 &vm_page_stats_reusable.reusable_shared, "");
2581 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
2582 &vm_page_stats_reusable.free_shared, "");
2583
2584
2585 extern unsigned int vm_page_free_count, vm_page_speculative_count;
2586 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
2587 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
2588
2589 extern unsigned int vm_page_cleaned_count;
2590 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
2591
2592 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
2593 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
2594 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
2595
2596 /* pageout counts */
2597 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
2598 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
2599
2600 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
2601 SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
2602 SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2603 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
2604 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
2605 SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
2606
2607 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
2608 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
2609 SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
2610 SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
2611 extern unsigned int vm_page_realtime_count;
2612 SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
2613 extern int vm_pageout_protect_realtime;
2614 SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
2615
2616 /* counts of pages prefaulted when entering a memory object */
2617 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
2618 extern int64_t vm_prefault_nb_no_page, vm_prefault_nb_wrong_page;
2619 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
2620 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
2621 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_no_page, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_no_page, "");
2622 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_wrong_page, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_wrong_page, "");
2623
2624 #if defined (__x86_64__)
2625 extern unsigned int vm_clump_promote_threshold;
2626 SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
2627 #if DEVELOPMENT || DEBUG
2628 extern unsigned long vm_clump_stats[];
2629 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
2630 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
2631 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
2632 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
2633 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
2634 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
2635 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
2636 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
2637 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
2638 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
2639 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
2640 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
2641 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
2642 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
2643 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
2644 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
2645 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
2646 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
2647 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
2648 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
2649 SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
2650 #endif /* if DEVELOPMENT || DEBUG */
2651 #endif /* #if defined (__x86_64__) */
2652
2653 #if CONFIG_SECLUDED_MEMORY
2654
2655 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
2656 extern unsigned int vm_page_secluded_target;
2657 extern unsigned int vm_page_secluded_count;
2658 extern unsigned int vm_page_secluded_count_free;
2659 extern unsigned int vm_page_secluded_count_inuse;
2660 extern unsigned int vm_page_secluded_count_over_target;
2661 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
2662 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
2663 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
2664 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
2665 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
2666
2667 extern struct vm_page_secluded_data vm_page_secluded;
2668 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
2669 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
2670 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
2671 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
2672 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
2673 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
2674 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
2675 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
2676 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
2677
2678 #endif /* CONFIG_SECLUDED_MEMORY */
2679
2680 #if CONFIG_DEFERRED_RECLAIM
2681 #pragma mark Deferred Reclaim
2682 SYSCTL_NODE(_vm, OID_AUTO, reclaim, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Deferred Memory Reclamation");
2683 #if DEVELOPMENT || DEBUG
2684 /*
2685 * VM reclaim testing
2686 */
2687 extern bool vm_deferred_reclamation_block_until_task_has_been_reclaimed(task_t task);
2688
2689 static int
2690 sysctl_vm_reclaim_wait_for_pid SYSCTL_HANDLER_ARGS
2691 {
2692 int error = EINVAL, pid = 0;
2693 /*
2694 * Only send on write
2695 */
2696 error = sysctl_handle_int(oidp, &pid, 0, req);
2697 if (error || !req->newptr) {
2698 return error;
2699 }
2700 if (pid <= 0) {
2701 return EINVAL;
2702 }
2703 proc_t p = proc_find(pid);
2704 if (p == PROC_NULL) {
2705 return ESRCH;
2706 }
2707 task_t t = proc_task(p);
2708 if (t == TASK_NULL) {
2709 proc_rele(p);
2710 return ESRCH;
2711 }
2712 task_reference(t);
2713 proc_rele(p);
2714
2715 bool success = vm_deferred_reclamation_block_until_task_has_been_reclaimed(t);
2716 if (success) {
2717 error = 0;
2718 }
2719 task_deallocate(t);
2720
2721 return error;
2722 }
2723
2724 SYSCTL_PROC(_vm_reclaim, OID_AUTO, wait_for_pid,
2725 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2726 &sysctl_vm_reclaim_wait_for_pid, "I",
2727 "Block until the given pid has been drained by kernel GC");
2728
2729 static int
2730 sysctl_vm_reclaim_drain_pid SYSCTL_HANDLER_ARGS
2731 {
2732 int error = EINVAL;
2733 kern_return_t kr;
2734 pid_t pid;
2735 error = sysctl_handle_int(oidp, &pid, 0, req);
2736 /* Only reclaim on write */
2737 if (error || !req->newptr) {
2738 return error;
2739 }
2740 if (pid <= 0) {
2741 return EINVAL;
2742 }
2743 proc_t p = proc_find(pid);
2744 if (p == PROC_NULL) {
2745 return ESRCH;
2746 }
2747 task_t t = proc_task(p);
2748 if (t == TASK_NULL) {
2749 proc_rele(p);
2750 return ESRCH;
2751 }
2752 task_reference(t);
2753 proc_rele(p);
2754 kr = vm_deferred_reclamation_task_drain(t, RECLAIM_OPTIONS_NONE);
2755 task_deallocate(t);
2756 return mach_to_bsd_errno(kr);
2757 }
2758
2759 SYSCTL_PROC(_vm_reclaim, OID_AUTO, drain_pid,
2760 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2761 &sysctl_vm_reclaim_drain_pid, "I",
2762 "Drain the deferred reclamation buffer for a pid");
2763
2764 static int
proc_filter_reclaimable(proc_t p,__unused void * arg)2765 proc_filter_reclaimable(proc_t p, __unused void *arg)
2766 {
2767 task_t task = proc_task(p);
2768 return vm_deferred_reclamation_task_has_ring(task);
2769 }
2770
2771 static int
proc_reclaim_drain(proc_t p,__unused void * arg)2772 proc_reclaim_drain(proc_t p, __unused void *arg)
2773 {
2774 kern_return_t kr;
2775 task_t task = proc_task(p);
2776 kr = vm_deferred_reclamation_task_drain(task, RECLAIM_OPTIONS_NONE);
2777 return mach_to_bsd_errno(kr);
2778 }
2779
2780 static int
2781 sysctl_vm_reclaim_drain_all SYSCTL_HANDLER_ARGS
2782 {
2783 int error;
2784 int val;
2785 if (!req->newptr) {
2786 return EINVAL;
2787 }
2788 error = sysctl_handle_int(oidp, &val, 0, req);
2789 if (error || val == FALSE) {
2790 return error;
2791 }
2792 proc_iterate(PROC_ALLPROCLIST, proc_reclaim_drain, NULL,
2793 proc_filter_reclaimable, NULL);
2794 return 0;
2795 }
2796
2797 SYSCTL_PROC(_vm_reclaim, OID_AUTO, drain_all,
2798 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
2799 &sysctl_vm_reclaim_drain_all, "I",
2800 "Fully reclaim from every deferred reclamation buffer on the system");
2801
2802 extern uint32_t vm_reclaim_buffer_count;
2803 extern uint64_t vm_reclaim_gc_epoch;
2804 extern uint64_t vm_reclaim_gc_reclaim_count;
2805 extern uint64_t vm_reclaim_sampling_period_abs;
2806 extern uint64_t vm_reclaim_sampling_period_ns;
2807 extern bool vm_reclaim_debug;
2808 extern bool vm_reclaim_enabled;
2809 extern uint32_t vm_reclaim_autotrim_pct_normal;
2810 extern uint32_t vm_reclaim_autotrim_pct_pressure;
2811 extern uint32_t vm_reclaim_autotrim_pct_critical;
2812 extern uint32_t vm_reclaim_wma_weight_base;
2813 extern uint32_t vm_reclaim_wma_weight_cur;
2814 extern uint32_t vm_reclaim_wma_denom;
2815 extern uint64_t vm_reclaim_abandonment_threshold;
2816
2817 SYSCTL_UINT(_vm_reclaim, OID_AUTO, reclaim_buffer_count,
2818 CTLFLAG_RD | CTLFLAG_LOCKED, (uint32_t *)&vm_reclaim_buffer_count, 0,
2819 "The number of deferred memory buffers currently alive");
2820 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, reclaim_gc_epoch,
2821 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_gc_epoch,
2822 "Number of times the global GC thread has run");
2823 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, reclaim_gc_reclaim_count,
2824 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_gc_reclaim_count,
2825 "Number of times the global GC thread has reclaimed from a buffer");
2826 SYSCTL_COMPAT_UINT(_vm_reclaim, OID_AUTO, debug,
2827 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_debug, 0,
2828 "Debug logs for vm.reclaim");
2829 SYSCTL_COMPAT_UINT(_vm_reclaim, OID_AUTO, enabled,
2830 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_enabled, 0,
2831 "Whether deferred memory reclamation is enabled on this system");
2832 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_normal,
2833 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_normal, 0,
2834 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2835 "to engage auto-trim when the system is operating normally");
2836 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_pressure,
2837 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_pressure, 0,
2838 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2839 "to engage auto-trim when the system is under memory pressure");
2840 SYSCTL_UINT(_vm_reclaim, OID_AUTO, autotrim_pct_critical,
2841 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_autotrim_pct_critical, 0,
2842 "Percentage of a task's lifetime max phys_footprint that must be reclaimable "
2843 "to engage auto-trim when the system is under critical memory pressure");
2844 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_weight_base,
2845 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_weight_base, 0,
2846 "Weight applied to historical minimum buffer size samples");
2847 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_weight_cur,
2848 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_weight_cur, 0,
2849 "Weight applied to current sampled minimum buffer size");
2850 SYSCTL_UINT(_vm_reclaim, OID_AUTO, wma_denom,
2851 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_wma_denom, 0,
2852 "Denominator for weighted moving average calculation");
2853 SYSCTL_QUAD(_vm_reclaim, OID_AUTO, abandonment_threshold,
2854 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_abandonment_threshold,
2855 "The number of sampling periods between accounting updates that may elapse "
2856 "before the buffer is considered \"abandoned\"");
2857
2858 static int
2859 sysctl_vm_reclaim_sampling_period SYSCTL_HANDLER_ARGS
2860 {
2861 uint64_t new_val_ns;
2862 uint64_t old_val_ns = vm_reclaim_sampling_period_ns;
2863 int err = sysctl_io_number(req, vm_reclaim_sampling_period_ns,
2864 sizeof(vm_reclaim_sampling_period_ns), &new_val_ns, NULL);
2865 if (err || !req->newptr) {
2866 return err;
2867 }
2868 if (new_val_ns != old_val_ns) {
2869 vm_reclaim_sampling_period_ns = new_val_ns;
2870 nanoseconds_to_absolutetime(vm_reclaim_sampling_period_ns, &vm_reclaim_sampling_period_abs);
2871 }
2872 return 0;
2873 }
2874
2875 SYSCTL_PROC(_vm_reclaim, OID_AUTO, sampling_period_ns,
2876 CTLFLAG_RW | CTLTYPE_QUAD | CTLFLAG_LOCKED, NULL, 0, sysctl_vm_reclaim_sampling_period, "QU",
2877 "Interval (nanoseconds) at which to sample the minimum buffer size and "
2878 "consider trimming excess");
2879 #endif /* DEVELOPMENT || DEBUG */
2880 #endif /* CONFIG_DEFERRED_RECLAIM */
2881
2882 #include <kern/thread.h>
2883 #include <sys/user.h>
2884
2885 void vm_pageout_io_throttle(void);
2886
2887 void
vm_pageout_io_throttle(void)2888 vm_pageout_io_throttle(void)
2889 {
2890 struct uthread *uthread = current_uthread();
2891
2892 /*
2893 * thread is marked as a low priority I/O type
2894 * and the I/O we issued while in this cleaning operation
2895 * collided with normal I/O operations... we'll
2896 * delay in order to mitigate the impact of this
2897 * task on the normal operation of the system
2898 */
2899
2900 if (uthread->uu_lowpri_window) {
2901 throttle_lowpri_io(1);
2902 }
2903 }
2904
2905 int
vm_pressure_monitor(__unused struct proc * p,struct vm_pressure_monitor_args * uap,int * retval)2906 vm_pressure_monitor(
2907 __unused struct proc *p,
2908 struct vm_pressure_monitor_args *uap,
2909 int *retval)
2910 {
2911 kern_return_t kr;
2912 uint32_t pages_reclaimed;
2913 uint32_t pages_wanted;
2914
2915 kr = mach_vm_pressure_monitor(
2916 (boolean_t) uap->wait_for_pressure,
2917 uap->nsecs_monitored,
2918 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
2919 &pages_wanted);
2920
2921 switch (kr) {
2922 case KERN_SUCCESS:
2923 break;
2924 case KERN_ABORTED:
2925 return EINTR;
2926 default:
2927 return EINVAL;
2928 }
2929
2930 if (uap->pages_reclaimed) {
2931 if (copyout((void *)&pages_reclaimed,
2932 uap->pages_reclaimed,
2933 sizeof(pages_reclaimed)) != 0) {
2934 return EFAULT;
2935 }
2936 }
2937
2938 *retval = (int) pages_wanted;
2939 return 0;
2940 }
2941
2942 int
kas_info(struct proc * p,struct kas_info_args * uap,int * retval __unused)2943 kas_info(struct proc *p,
2944 struct kas_info_args *uap,
2945 int *retval __unused)
2946 {
2947 #ifndef CONFIG_KAS_INFO
2948 (void)p;
2949 (void)uap;
2950 return ENOTSUP;
2951 #else /* CONFIG_KAS_INFO */
2952 int selector = uap->selector;
2953 user_addr_t valuep = uap->value;
2954 user_addr_t sizep = uap->size;
2955 user_size_t size, rsize;
2956 int error;
2957
2958 if (!kauth_cred_issuser(kauth_cred_get())) {
2959 return EPERM;
2960 }
2961
2962 #if CONFIG_MACF
2963 error = mac_system_check_kas_info(kauth_cred_get(), selector);
2964 if (error) {
2965 return error;
2966 }
2967 #endif
2968
2969 if (IS_64BIT_PROCESS(p)) {
2970 user64_size_t size64;
2971 error = copyin(sizep, &size64, sizeof(size64));
2972 size = (user_size_t)size64;
2973 } else {
2974 user32_size_t size32;
2975 error = copyin(sizep, &size32, sizeof(size32));
2976 size = (user_size_t)size32;
2977 }
2978 if (error) {
2979 return error;
2980 }
2981
2982 switch (selector) {
2983 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
2984 {
2985 uint64_t slide = vm_kernel_slide;
2986
2987 if (sizeof(slide) != size) {
2988 return EINVAL;
2989 }
2990
2991 error = copyout(&slide, valuep, sizeof(slide));
2992 if (error) {
2993 return error;
2994 }
2995 rsize = size;
2996 }
2997 break;
2998 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
2999 {
3000 uint32_t i;
3001 kernel_mach_header_t *mh = &_mh_execute_header;
3002 struct load_command *cmd;
3003 cmd = (struct load_command*) &mh[1];
3004 uint64_t *bases;
3005 rsize = mh->ncmds * sizeof(uint64_t);
3006
3007 /*
3008 * Return the size if no data was passed
3009 */
3010 if (valuep == 0) {
3011 break;
3012 }
3013
3014 if (rsize > size) {
3015 return EINVAL;
3016 }
3017
3018 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3019
3020 for (i = 0; i < mh->ncmds; i++) {
3021 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3022 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3023 bases[i] = (uint64_t)sg->vmaddr;
3024 }
3025 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3026 }
3027
3028 error = copyout(bases, valuep, rsize);
3029
3030 kfree_data(bases, rsize);
3031
3032 if (error) {
3033 return error;
3034 }
3035 }
3036 break;
3037 case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
3038 case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
3039 {
3040 #if CONFIG_SPTM
3041 const uint64_t slide =
3042 (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
3043 #else
3044 const uint64_t slide = 0;
3045 #endif
3046
3047 if (sizeof(slide) != size) {
3048 return EINVAL;
3049 }
3050
3051 error = copyout(&slide, valuep, sizeof(slide));
3052 if (error) {
3053 return error;
3054 }
3055 rsize = size;
3056 }
3057 break;
3058 default:
3059 return EINVAL;
3060 }
3061
3062 if (IS_64BIT_PROCESS(p)) {
3063 user64_size_t size64 = (user64_size_t)rsize;
3064 error = copyout(&size64, sizep, sizeof(size64));
3065 } else {
3066 user32_size_t size32 = (user32_size_t)rsize;
3067 error = copyout(&size32, sizep, sizeof(size32));
3068 }
3069
3070 return error;
3071 #endif /* CONFIG_KAS_INFO */
3072 }
3073
3074 #pragma clang diagnostic push
3075 #pragma clang diagnostic ignored "-Wcast-qual"
3076 #pragma clang diagnostic ignored "-Wunused-function"
3077
3078 static void
asserts()3079 asserts()
3080 {
3081 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3082 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3083 }
3084
3085 SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3086 SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3087 #pragma clang diagnostic pop
3088
3089 extern uint32_t vm_page_pages;
3090 SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3091
3092 extern uint32_t vm_page_busy_absent_skipped;
3093 SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3094
3095 extern uint32_t vm_page_upl_tainted;
3096 SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3097
3098 extern uint32_t vm_page_iopl_tainted;
3099 SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3100
3101 #if __arm64__ && (DEVELOPMENT || DEBUG)
3102 extern int vm_footprint_suspend_allowed;
3103 SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3104
3105 extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3106 static int
3107 sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3108 {
3109 #pragma unused(oidp, arg1, arg2)
3110 int error = 0;
3111 int new_value;
3112
3113 if (req->newptr == USER_ADDR_NULL) {
3114 return 0;
3115 }
3116 error = SYSCTL_IN(req, &new_value, sizeof(int));
3117 if (error) {
3118 return error;
3119 }
3120 if (!vm_footprint_suspend_allowed) {
3121 if (new_value != 0) {
3122 /* suspends are not allowed... */
3123 return 0;
3124 }
3125 /* ... but let resumes proceed */
3126 }
3127 DTRACE_VM2(footprint_suspend,
3128 vm_map_t, current_map(),
3129 int, new_value);
3130
3131 pmap_footprint_suspend(current_map(), new_value);
3132
3133 return 0;
3134 }
3135 SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3136 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3137 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3138 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3139
3140 extern uint64_t vm_map_corpse_footprint_count;
3141 extern uint64_t vm_map_corpse_footprint_size_avg;
3142 extern uint64_t vm_map_corpse_footprint_size_max;
3143 extern uint64_t vm_map_corpse_footprint_full;
3144 extern uint64_t vm_map_corpse_footprint_no_buf;
3145 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3146 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3147 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3148 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3149 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3150 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3151 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3152 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3153 SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3154 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3155
3156 #if CODE_SIGNING_MONITOR
3157 extern uint64_t vm_cs_defer_to_csm;
3158 extern uint64_t vm_cs_defer_to_csm_not;
3159 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3160 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3161 SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3162 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3163 #endif /* CODE_SIGNING_MONITOR */
3164
3165 extern uint64_t shared_region_pager_copied;
3166 extern uint64_t shared_region_pager_slid;
3167 extern uint64_t shared_region_pager_slid_error;
3168 extern uint64_t shared_region_pager_reclaimed;
3169 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3170 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3171 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3172 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3173 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3174 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3175 SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3176 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3177 extern int shared_region_destroy_delay;
3178 SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3179 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3180
3181 #if MACH_ASSERT
3182 extern int pmap_ledgers_panic_leeway;
3183 SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3184 #endif /* MACH_ASSERT */
3185
3186
3187 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3188 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3189 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3190 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3191 extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3192 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3193 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3194 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3195 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3196 extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3197 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3198 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3199 extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3200 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3201 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3202 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3203 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3204 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3205 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3206 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3207 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3208 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3209 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3210 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3211 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3212 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3213 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3214 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3215 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3216 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3217 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3218 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3219 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3220 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3221 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3222 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3223 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3224 SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3225 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3226
3227 extern int vm_protect_privileged_from_untrusted;
3228 SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3229 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3230 extern uint64_t vm_copied_on_read;
3231 extern uint64_t vm_copied_on_read_kernel_map;
3232 extern uint64_t vm_copied_on_read_platform_map;
3233 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3234 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3235 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read_kernel_map,
3236 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read_kernel_map, "");
3237 SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read_platform_map,
3238 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read_platform_map, "");
3239
3240 extern int vm_shared_region_count;
3241 extern int vm_shared_region_peak;
3242 SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3243 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3244 SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3245 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3246 #if DEVELOPMENT || DEBUG
3247 extern unsigned int shared_region_pagers_resident_count;
3248 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3249 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3250 extern unsigned int shared_region_pagers_resident_peak;
3251 SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3252 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3253 extern int shared_region_pager_count;
3254 SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3255 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3256 #if __has_feature(ptrauth_calls)
3257 extern int shared_region_key_count;
3258 SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3259 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3260 extern int vm_shared_region_reslide_count;
3261 SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3262 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3263 #endif /* __has_feature(ptrauth_calls) */
3264 #endif /* DEVELOPMENT || DEBUG */
3265
3266 #if MACH_ASSERT
3267 extern int debug4k_filter;
3268 SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3269 extern int debug4k_panic_on_terminate;
3270 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3271 extern int debug4k_panic_on_exception;
3272 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3273 extern int debug4k_panic_on_misaligned_sharing;
3274 SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3275 #endif /* MACH_ASSERT */
3276
3277 extern uint64_t vm_map_set_size_limit_count;
3278 extern uint64_t vm_map_set_data_limit_count;
3279 extern uint64_t vm_map_enter_RLIMIT_AS_count;
3280 extern uint64_t vm_map_enter_RLIMIT_DATA_count;
3281 SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
3282 SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
3283 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
3284 SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
3285
3286 extern uint64_t vm_map_copy_extra_adjustments;
3287 SYSCTL_QUAD(_vm, OID_AUTO, map_copy_extra_adjustments, CTLFLAG_RD | CTLFLAG_LOCKED,
3288 &vm_map_copy_extra_adjustments, "");
3289
3290 extern uint64_t vm_fault_resilient_media_initiate;
3291 extern uint64_t vm_fault_resilient_media_retry;
3292 extern uint64_t vm_fault_resilient_media_proceed;
3293 extern uint64_t vm_fault_resilient_media_release;
3294 extern uint64_t vm_fault_resilient_media_abort1;
3295 extern uint64_t vm_fault_resilient_media_abort2;
3296 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
3297 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
3298 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
3299 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
3300 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
3301 SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
3302 #if MACH_ASSERT
3303 extern int vm_fault_resilient_media_inject_error1_rate;
3304 extern int vm_fault_resilient_media_inject_error1;
3305 extern int vm_fault_resilient_media_inject_error2_rate;
3306 extern int vm_fault_resilient_media_inject_error2;
3307 extern int vm_fault_resilient_media_inject_error3_rate;
3308 extern int vm_fault_resilient_media_inject_error3;
3309 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
3310 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
3311 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
3312 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
3313 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
3314 SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
3315 #endif /* MACH_ASSERT */
3316
3317 extern uint64_t pmap_query_page_info_retries;
3318 SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
3319
3320 /*
3321 * A sysctl which causes all existing shared regions to become stale. They
3322 * will no longer be used by anything new and will be torn down as soon as
3323 * the last existing user exits. A write of non-zero value causes that to happen.
3324 * This should only be used by launchd, so we check that this is initproc.
3325 */
3326 static int
shared_region_pivot(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3327 shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3328 {
3329 unsigned int value = 0;
3330 int changed = 0;
3331 int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed);
3332 if (error || !changed) {
3333 return error;
3334 }
3335 if (current_proc() != initproc) {
3336 return EPERM;
3337 }
3338
3339 vm_shared_region_pivot();
3340
3341 return 0;
3342 }
3343
3344 SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
3345 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
3346 0, 0, shared_region_pivot, "I", "");
3347
3348 extern uint64_t vm_object_shadow_forced;
3349 extern uint64_t vm_object_shadow_skipped;
3350 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
3351 &vm_object_shadow_forced, "");
3352 SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
3353 &vm_object_shadow_skipped, "");
3354
3355 extern uint64_t vm_object_upl_throttle_cnt;
3356 SYSCTL_QUAD(_vm, OID_AUTO, object_upl_throttle_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
3357 &vm_object_upl_throttle_cnt,
3358 "The number of times in which a UPL write was throttled due to pageout starvation");
3359
3360 #if HAS_MTE
3361 #pragma mark MTE
3362
3363 SYSCTL_NODE(_vm, OID_AUTO, mte, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "mte");
3364
3365 /* sysctls for vm.mte.* counters. */
3366
3367 SYSCTL_UINT(_vm_mte, OID_AUTO, tagged, CTLFLAG_RD,
3368 &vm_page_tagged_count, 0, "tagged pages in use");
3369
3370 SYSCTL_QUAD(_vm_mte, OID_AUTO, refill_thread_wakeups, CTLFLAG_RD,
3371 &vm_mte_refill_thread_wakeups,
3372 "the number of times the refill thread was woken up");
3373
3374 /* sysctls for vm.mte.free.* counters. */
3375
3376 SYSCTL_NODE(_vm_mte, OID_AUTO, free, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "free counts");
3377
3378 SYSCTL_UINT(_vm_mte_free, OID_AUTO, total, CTLFLAG_RD,
3379 &vm_page_free_count, 0,
3380 "total free pages (same as vm.page_free_count)");
3381 SYSCTL_UINT(_vm_mte_free, OID_AUTO, taggable, CTLFLAG_RD,
3382 &vm_page_free_taggable_count, 0,
3383 "free taggable pages in the MTE free queue");
3384 SYSCTL_UINT(_vm_mte_free, OID_AUTO, claimable, CTLFLAG_RD,
3385 &mte_claimable_queue.vmpfq_count, 0,
3386 "free tag storage pages on the MTE claimable queue");
3387
3388 SYSCTL_SCALABLE_COUNTER(_vm_mte_free, cpu_untagged, vm_cpu_free_count,
3389 "free untagged pages in CPU lists");
3390 SYSCTL_SCALABLE_COUNTER(_vm_mte_free, cpu_claimed, vm_cpu_free_claimed_count,
3391 "free claimed pages in CPU lists");
3392 SYSCTL_SCALABLE_COUNTER(_vm_mte_free, cpu_tagged, vm_cpu_free_tagged_count,
3393 "free tagged pages in CPU lists");
3394
3395 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_untaggable_0, CTLFLAG_RD,
3396 &mte_free_queues[MTE_FREE_UNTAGGABLE_0].vmpfq_count, 0,
3397 "disabled/pinned/deactivating/claimed (with 16 free pages or less) tag storage pages")
3398 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_untaggable_1, CTLFLAG_RD,
3399 &mte_free_queues[MTE_FREE_UNTAGGABLE_1].vmpfq_count, 0,
3400 "claimed (with 17 free pages or more) or disabled (with 16 pages or less) tag storage pages")
3401 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_untaggable_2, CTLFLAG_RD,
3402 &mte_free_queues[MTE_FREE_UNTAGGABLE_2].vmpfq_count, 0,
3403 "disabled (with 17 pages or more) tag storage pages")
3404 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_active_0, CTLFLAG_RD,
3405 &mte_free_queues[MTE_FREE_ACTIVE_0].vmpfq_count, 0,
3406 "active tag storages with free covered pages (bucket 0)");
3407 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_active_1, CTLFLAG_RD,
3408 &mte_free_queues[MTE_FREE_ACTIVE_1].vmpfq_count, 0,
3409 "active tag storages with free covered pages (bucket 1)");
3410 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_active_2, CTLFLAG_RD,
3411 &mte_free_queues[MTE_FREE_ACTIVE_2].vmpfq_count, 0,
3412 "active tag storages with free covered pages (bucket 2)");
3413 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_active_3, CTLFLAG_RD,
3414 &mte_free_queues[MTE_FREE_ACTIVE_3].vmpfq_count, 0,
3415 "active tag storages with free covered pages (bucket 3)");
3416 SYSCTL_UINT(_vm_mte_free, OID_AUTO, tag_storage_untaggable_activating, CTLFLAG_RD,
3417 &mte_free_queues[MTE_FREE_UNTAGGABLE_ACTIVATING].vmpfq_count, 0,
3418 "activating/reclaiming tag storages with free covered pages");
3419
3420 /* sysctls for vm.mte.tag_storage.cell_* counters. */
3421
3422 SYSCTL_NODE(_vm_mte, OID_AUTO, cell, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "mte cell");
3423
3424 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, disabled, CTLFLAG_RD,
3425 &mte_info_lists[MTE_LIST_DISABLED_IDX].count, 0,
3426 "free inactive tag storage pages");
3427 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, disabled_recursive, CTLFLAG_RD,
3428 &vm_page_recursive_tag_storage_count, 0,
3429 "recursive tag storage pages");
3430 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, disabled_unmanaged, CTLFLAG_RD,
3431 &vm_page_unmanaged_tag_storage_count, 0,
3432 "unmanaged tag storage pages");
3433 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, retired, CTLFLAG_RD,
3434 &vm_page_retired_tag_storage_count, 0,
3435 "retired tag storage pages");
3436 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, pinned, CTLFLAG_RD,
3437 &mte_info_lists[MTE_LIST_PINNED_IDX].count, 0,
3438 "unreclaimable tag storage pages");
3439 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, deactivating, CTLFLAG_RD,
3440 &mte_info_lists[MTE_LIST_DEACTIVATING_IDX].count, 0,
3441 "deactivating tag storage pages");
3442 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, claimed, CTLFLAG_RD,
3443 &mte_info_lists[MTE_LIST_CLAIMED_IDX].count, 0,
3444 "claimed tag storage pages");
3445 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, inactive, CTLFLAG_RD,
3446 &mte_info_lists[MTE_LIST_INACTIVE_IDX].count, 0,
3447 "free inactive tag storage pages");
3448 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, reclaiming, CTLFLAG_RD,
3449 &mte_info_lists[MTE_LIST_RECLAIMING_IDX].count, 0,
3450 "reclaiming tag storage pages");
3451 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, activating, CTLFLAG_RD,
3452 &mte_info_lists[MTE_LIST_ACTIVATING_IDX].count, 0,
3453 "activating tag storage pages");
3454 SYSCTL_UINT(_vm_mte_cell, OID_AUTO, active_0, CTLFLAG_RD,
3455 &mte_info_lists[MTE_LIST_ACTIVE_0_IDX].count, 0,
3456 "active tag storage pages with no used page tagged");
3457 static int
3458 tag_storage_active SYSCTL_HANDLER_ARGS
3459 {
3460 #pragma unused(arg1, arg2, oidp)
3461 uint32_t value = mteinfo_tag_storage_active(false);
3462
3463 return SYSCTL_OUT(req, &value, sizeof(value));
3464 }
3465 SYSCTL_PROC(_vm_mte_cell, OID_AUTO, active,
3466 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3467 0, 0, &tag_storage_active, "I",
3468 "active tag storage pages");
3469
3470 /* sysctls for vm.mte.tag_storage.* counters. */
3471
3472 SYSCTL_NODE(_vm_mte, OID_AUTO, tag_storage, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "mte tag storage");
3473
3474 SYSCTL_UINT(_vm_mte_tag_storage, OID_AUTO, reserved, CTLFLAG_RD,
3475 &vm_page_tag_storage_reserved, 0,
3476 "free tag storage pages reserve");
3477 SYSCTL_UINT(_vm_mte_tag_storage, OID_AUTO, wired, CTLFLAG_RD,
3478 &vm_page_wired_tag_storage_count, 0,
3479 "wired tag storage pages");
3480 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, activations, CTLFLAG_RD,
3481 &vm_page_tag_storage_activation_count,
3482 "tag storage activations (inactive/claimed -> active)");
3483 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, deactivations, CTLFLAG_RD,
3484 &vm_page_tag_storage_deactivation_count,
3485 "tag storage deactivations (active -> inactive)");
3486 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, reclaims, CTLFLAG_RD,
3487 &vm_page_tag_storage_reclaim_success_count,
3488 "successful tag storage reclamations");
3489 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, reclaims_from_cpu, CTLFLAG_RD,
3490 &vm_page_tag_storage_reclaim_from_cpu_count,
3491 "successful tag storage reclamations from the cpu free lists");
3492 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, reclaim_failures, CTLFLAG_RD,
3493 &vm_page_tag_storage_reclaim_failure_count,
3494 "failed tag storage reclamations");
3495 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, reclaim_wired_failures, CTLFLAG_RD,
3496 &vm_page_tag_storage_reclaim_wired_failure_count,
3497 "failed tag storage reclamations due to tag storage being wired");
3498 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, wire_relocations, CTLFLAG_RD,
3499 &vm_page_tag_storage_wire_relocation_count,
3500 "tag storage relocations due to wiring");
3501 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, reclaim_compressor_failures, CTLFLAG_RD,
3502 &vm_page_tag_storage_reclaim_compressor_failure_count,
3503 "failed tag storage reclamations due to tag storage used in compressor pool");
3504 SYSCTL_QUAD(_vm_mte_tag_storage, OID_AUTO, compressor_relocations, CTLFLAG_RD,
3505 &vm_page_tag_storage_compressor_relocation_count,
3506 "tag storage relocations due to compressor pool");
3507 SYSCTL_UINT(_vm_mte_tag_storage, OID_AUTO, free_unmanaged, CTLFLAG_RD,
3508 &vm_page_free_unmanaged_tag_storage_count, 0,
3509 "number of free unmanaged tag storage pages");
3510
3511 SYSCTL_SCALABLE_COUNTER(_vm_mte_tag_storage, cpu_allocated_claimed,
3512 vm_cpu_claimed_count, "claimed tag storage pages allocated");
3513
3514 static int
3515 tag_storage_fragmentation SYSCTL_HANDLER_ARGS
3516 {
3517 #pragma unused(arg1, arg2, oidp)
3518 uint32_t value = mteinfo_tag_storage_fragmentation(false);
3519
3520 return SYSCTL_OUT(req, &value, sizeof(value));
3521 }
3522 SYSCTL_PROC(_vm_mte_tag_storage, OID_AUTO, fragmentation,
3523 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3524 0, 0, &tag_storage_fragmentation, "I",
3525 "the achievable the fragmentation of the tag storage space (in parts per thousand)");
3526
3527 static int
3528 tag_storage_fragmentation_actual SYSCTL_HANDLER_ARGS
3529 {
3530 #pragma unused(arg1, arg2, oidp)
3531 uint32_t value = mteinfo_tag_storage_fragmentation(true);
3532
3533 return SYSCTL_OUT(req, &value, sizeof(value));
3534 }
3535 SYSCTL_PROC(_vm_mte_tag_storage, OID_AUTO, fragmentation_actual,
3536 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3537 0, 0, &tag_storage_fragmentation_actual, "I",
3538 "the actual the fragmentation of the tag storage space (in parts per thousand)");
3539
3540 /* sysctls for vm.mte.compresor_* */
3541
3542 extern unsigned int vm_object_no_compressor_pager_for_mte_count;
3543 SYSCTL_INT(_vm_mte, OID_AUTO, no_compressor_pager_for_mte, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_no_compressor_pager_for_mte_count, 0, "");
3544
3545 /* sysctls for MTE compression stats */
3546
3547 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_pages_compressed, compressor_tagged_pages_compressed, "");
3548 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_pages_decompressed, compressor_tagged_pages_decompressed, "");
3549 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_pages_freed, compressor_tagged_pages_freed, "");
3550 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_pages_corrupted, compressor_tagged_pages_corrupted, "");
3551 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_overhead_bytes, compressor_tags_overhead_bytes, "");
3552 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_pages, compressor_tagged_pages, "");
3553 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_ts_pages_used, compressor_tag_storage_pages_in_pool,
3554 "the number of tag storage pages used in the compressor");
3555 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_non_ts_pages_used, compressor_non_tag_storage_pages_in_pool,
3556 "the number of non-tag storage pages used in the compressor");
3557 #if DEVELOPMENT || DEBUG
3558 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_all_zero, compressor_tags_all_zero, "");
3559 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_same_value, compressor_tags_same_value, "");
3560 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_below_align, compressor_tags_below_align, "");
3561 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_above_align, compressor_tags_above_align, "");
3562 SYSCTL_SCALABLE_COUNTER(_vm_mte, compress_incompressible, compressor_tags_incompressible, "");
3563 #endif /* DEVELOPMENT || DEBUG */
3564
3565 #endif /* HAS_MTE */
3566
3567 SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
3568 &vmtc_total, 0, "total text page corruptions detected");
3569
3570
3571 #if DEBUG || DEVELOPMENT
3572 /*
3573 * A sysctl that can be used to corrupt a text page with an illegal instruction.
3574 * Used for testing text page self healing.
3575 */
3576 extern kern_return_t vm_corrupt_text_addr(uintptr_t);
3577 static int
corrupt_text_addr(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3578 corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3579 {
3580 uint64_t value = 0;
3581 int error = sysctl_handle_quad(oidp, &value, 0, req);
3582 if (error || !req->newptr) {
3583 return error;
3584 }
3585
3586 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
3587 return 0;
3588 } else {
3589 return EINVAL;
3590 }
3591 }
3592
3593 SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
3594 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3595 0, 0, corrupt_text_addr, "-", "");
3596 #endif /* DEBUG || DEVELOPMENT */
3597
3598 #if CONFIG_MAP_RANGES
3599 /*
3600 * vm.malloc_ranges
3601 *
3602 * space-separated list of <left:right> hexadecimal addresses.
3603 */
3604 static int
3605 vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
3606 {
3607 vm_map_t map = current_map();
3608 struct mach_vm_range r1, r2;
3609 char str[20 * 4];
3610 int len;
3611 mach_vm_offset_t right_hole_max;
3612
3613 if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
3614 return ENOENT;
3615 }
3616 if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
3617 return ENOENT;
3618 }
3619
3620 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
3621 right_hole_max = MACH_VM_JUMBO_ADDRESS;
3622 #else /* !XNU_TARGET_OS_IOS || !EXTENDED_USER_VA_SUPPORT */
3623 right_hole_max = get_map_max(map);
3624 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
3625
3626 len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
3627 r1.max_address, r2.min_address,
3628 r2.max_address, right_hole_max);
3629
3630 return SYSCTL_OUT(req, str, len);
3631 }
3632
3633 SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
3634 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3635 0, 0, &vm_map_malloc_ranges, "A", "");
3636
3637 #if DEBUG || DEVELOPMENT
3638 static int
3639 vm_map_user_range_default SYSCTL_HANDLER_ARGS
3640 {
3641 #pragma unused(arg1, arg2, oidp)
3642 struct mach_vm_range range;
3643
3644 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
3645 != KERN_SUCCESS) {
3646 return EINVAL;
3647 }
3648
3649 return SYSCTL_OUT(req, &range, sizeof(range));
3650 }
3651
3652 static int
3653 vm_map_user_range_heap SYSCTL_HANDLER_ARGS
3654 {
3655 #pragma unused(arg1, arg2, oidp)
3656 struct mach_vm_range range;
3657
3658 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
3659 != KERN_SUCCESS) {
3660 return EINVAL;
3661 }
3662
3663 return SYSCTL_OUT(req, &range, sizeof(range));
3664 }
3665
3666 static int
3667 vm_map_user_range_large_file SYSCTL_HANDLER_ARGS
3668 {
3669 #pragma unused(arg1, arg2, oidp)
3670 struct mach_vm_range range;
3671
3672 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_LARGE_FILE, &range)
3673 != KERN_SUCCESS) {
3674 return EINVAL;
3675 }
3676
3677 return SYSCTL_OUT(req, &range, sizeof(range));
3678 }
3679
3680 /*
3681 * A sysctl that can be used to return ranges for the current VM map.
3682 * Used for testing VM ranges.
3683 */
3684 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3685 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
3686 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3687 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
3688 SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_large_file, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3689 0, 0, &vm_map_user_range_large_file, "S,mach_vm_range", "");
3690
3691 #endif /* DEBUG || DEVELOPMENT */
3692 #endif /* CONFIG_MAP_RANGES */
3693
3694 #if DEBUG || DEVELOPMENT
3695 #endif /* DEBUG || DEVELOPMENT */
3696
3697 extern uint64_t vm_map_range_overflows_count;
3698 SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
3699 extern boolean_t vm_map_range_overflows_log;
3700 SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
3701
3702 extern uint64_t c_seg_filled_no_contention;
3703 extern uint64_t c_seg_filled_contention;
3704 extern clock_sec_t c_seg_filled_contention_sec_max;
3705 extern clock_nsec_t c_seg_filled_contention_nsec_max;
3706 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
3707 SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
3708 SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
3709 SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
3710 #if (XNU_TARGET_OS_OSX && __arm64__)
3711 extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
3712 extern int c_process_major_yield_after; /* yield after moving ? segments */
3713 extern uint64_t c_process_major_reports;
3714 extern clock_sec_t c_process_major_max_sec;
3715 extern clock_nsec_t c_process_major_max_nsec;
3716 extern uint32_t c_process_major_peak_segcount;
3717 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
3718 SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
3719 SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
3720 SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
3721 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
3722 SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
3723 #endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3724
3725 #if DEVELOPMENT || DEBUG
3726 extern int panic_object_not_alive;
3727 SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
3728 #endif /* DEVELOPMENT || DEBUG */
3729
3730 #if FBDP_DEBUG_OBJECT_NO_PAGER
3731 extern int fbdp_no_panic;
3732 SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
3733 #endif /* MACH_ASSERT */
3734
3735 extern uint64_t cluster_direct_write_wired;
3736 SYSCTL_QUAD(_vm, OID_AUTO, cluster_direct_write_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &cluster_direct_write_wired, "");
3737
3738 extern uint64_t vm_object_pageout_not_on_queue;
3739 extern uint64_t vm_object_pageout_not_pageable;
3740 extern uint64_t vm_object_pageout_pageable;
3741 extern uint64_t vm_object_pageout_active_local;
3742 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_not_on_queue, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_not_on_queue, "");
3743 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_not_pageable, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_not_pageable, "");
3744 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_pageable, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_pageable, "");
3745 SYSCTL_QUAD(_vm, OID_AUTO, object_pageout_active_local, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_object_pageout_active_local, "");
3746
3747
3748 #if DEVELOPMENT || DEBUG
3749
3750 static uint32_t
sysctl_compressor_seg_magic(vm_c_serialize_add_data_t with_data)3751 sysctl_compressor_seg_magic(vm_c_serialize_add_data_t with_data)
3752 {
3753 #if HAS_MTE
3754 if (with_data == VM_C_SERIALIZE_DATA_TAGS) {
3755 return VM_C_SEGMENT_INFO_MAGIC_WITH_TAGS;
3756 }
3757 #else
3758 #pragma unused(with_data)
3759 #endif /* HAS_MTE */
3760 return VM_C_SEGMENT_INFO_MAGIC;
3761 }
3762
3763 /* The largest possible single segment + its slots is
3764 * (sizeof(c_segment_info) + C_SLOT_MAX_INDEX * sizeof(c_slot_info)) + (data of a single segment) */
3765 #define SYSCTL_SEG_BUF_SIZE (8 * 1024 + 64 * 1024)
3766
3767 extern uint32_t c_segments_available;
3768
3769 struct sysctl_buf_header {
3770 uint32_t magic;
3771 } __attribute__((packed));
3772
3773 /* This sysctl iterates over the populated c_segments and writes some info about each one and its slots.
3774 * instead of doing everything here, the function calls a function vm_compressor.c. */
3775 static int
sysctl_compressor_segments_stream(struct sysctl_req * req,vm_c_serialize_add_data_t with_data)3776 sysctl_compressor_segments_stream(struct sysctl_req *req, vm_c_serialize_add_data_t with_data)
3777 {
3778 char* buf = kalloc_data(SYSCTL_SEG_BUF_SIZE, Z_WAITOK | Z_ZERO);
3779 if (!buf) {
3780 return ENOMEM;
3781 }
3782 size_t offset = 0;
3783 int error = 0;
3784 int segno = 0;
3785 /* 4 byte header to identify the version of the formatting of the data.
3786 * This should be incremented if c_segment_info or c_slot_info are changed */
3787 ((struct sysctl_buf_header*)buf)->magic = sysctl_compressor_seg_magic(with_data);
3788 offset += sizeof(uint32_t);
3789
3790 while (segno < c_segments_available) {
3791 size_t left_sz = SYSCTL_SEG_BUF_SIZE - offset;
3792 kern_return_t kr = vm_compressor_serialize_segment_debug_info(segno, buf + offset, &left_sz, with_data);
3793 if (kr == KERN_NO_SPACE) {
3794 /* failed to add another segment, push the current buffer out and try again */
3795 if (offset == 0) {
3796 error = EINVAL; /* no space to write but I didn't write anything, shouldn't really happen */
3797 goto out;
3798 }
3799 /* write out chunk */
3800 error = SYSCTL_OUT(req, buf, offset);
3801 if (error) {
3802 goto out;
3803 }
3804 offset = 0;
3805 bzero(buf, SYSCTL_SEG_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3806 /* don't increment segno, need to try again saving the current one */
3807 } else if (kr != KERN_SUCCESS) {
3808 error = EINVAL;
3809 goto out;
3810 } else {
3811 offset += left_sz;
3812 ++segno;
3813 assert(offset <= SYSCTL_SEG_BUF_SIZE);
3814 }
3815 }
3816
3817 if (offset > 0) { /* write last chunk */
3818 error = SYSCTL_OUT(req, buf, offset);
3819 }
3820
3821 out:
3822 kfree_data(buf, SYSCTL_SEG_BUF_SIZE)
3823 return error;
3824 }
3825
3826 static int
sysctl_compressor_segments(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3827 sysctl_compressor_segments(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3828 {
3829 return sysctl_compressor_segments_stream(req, VM_C_SERIALIZE_DATA_NONE);
3830 }
3831 SYSCTL_PROC(_vm, OID_AUTO, compressor_segments, CTLTYPE_STRUCT | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_compressor_segments, "S", "");
3832
3833 #if HAS_MTE
3834 static int
sysctl_compressor_segments_data(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3835 sysctl_compressor_segments_data(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3836 {
3837 return sysctl_compressor_segments_stream(req, VM_C_SERIALIZE_DATA_TAGS);
3838 }
3839 SYSCTL_PROC(_vm, OID_AUTO, compressor_segments_data, CTLTYPE_STRUCT | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_compressor_segments_data, "S", "");
3840 #endif /* HAS_MTE */
3841
3842 extern uint32_t vm_compressor_fragmentation_level(void);
3843
3844 static int
sysctl_compressor_fragmentation_level(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3845 sysctl_compressor_fragmentation_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3846 {
3847 uint32_t value = vm_compressor_fragmentation_level();
3848 return SYSCTL_OUT(req, &value, sizeof(value));
3849 }
3850
3851 SYSCTL_PROC(_vm, OID_AUTO, compressor_fragmentation_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_compressor_fragmentation_level, "IU", "");
3852
3853 extern uint32_t vm_compressor_incore_fragmentation_wasted_pages(void);
3854
3855 static int
sysctl_compressor_incore_fragmentation_wasted_pages(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3856 sysctl_compressor_incore_fragmentation_wasted_pages(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3857 {
3858 uint32_t value = vm_compressor_incore_fragmentation_wasted_pages();
3859 return SYSCTL_OUT(req, &value, sizeof(value));
3860 }
3861
3862 SYSCTL_PROC(_vm, OID_AUTO, compressor_incore_fragmentation_wasted_pages, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_compressor_incore_fragmentation_wasted_pages, "IU", "");
3863
3864
3865
3866 #define SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE (8 * 1024)
3867
3868
3869 /* This sysctl iterates over all the entries of the vm_map of the a given process and write some info about the vm_object pointed by the entries.
3870 * This can be used for mapping where are all the pages of a process located in the compressor.
3871 */
3872 static int
sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)3873 sysctl_task_vm_objects_slotmap(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3874 {
3875 int error = 0;
3876 char *buf = NULL;
3877 proc_t p = PROC_NULL;
3878 task_t task = TASK_NULL;
3879 vm_map_t map = VM_MAP_NULL;
3880 __block size_t offset = 0;
3881
3882 /* go from pid to proc to task to vm_map. see sysctl_procargsx() for another example of this procession */
3883 int *name = arg1;
3884 int namelen = arg2;
3885 if (namelen < 1) {
3886 return EINVAL;
3887 }
3888 int pid = name[0];
3889 p = proc_find(pid); /* this increments a reference to the proc */
3890 if (p == PROC_NULL) {
3891 return EINVAL;
3892 }
3893 task = proc_task(p);
3894 proc_rele(p); /* decrement ref of proc */
3895 p = PROC_NULL;
3896 if (task == TASK_NULL) {
3897 return EINVAL;
3898 }
3899 /* convert proc reference to task reference */
3900 task_reference(task);
3901 /* task reference to map reference */
3902 map = get_task_map_reference(task);
3903 task_deallocate(task);
3904
3905 if (map == VM_MAP_NULL) {
3906 return EINVAL; /* nothing allocated yet */
3907 }
3908
3909 buf = kalloc_data(SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE, Z_WAITOK | Z_ZERO);
3910 if (!buf) {
3911 error = ENOMEM;
3912 goto out;
3913 }
3914
3915 /* 4 byte header to identify the version of the formatting of the data.
3916 * This should be incremented if c_segment_info or c_slot_info are changed */
3917 ((struct sysctl_buf_header*)buf)->magic = VM_MAP_ENTRY_INFO_MAGIC;
3918 offset += sizeof(uint32_t);
3919
3920 kern_return_t (^write_header)(int) = ^kern_return_t (int nentries) {
3921 /* write the header, happens only once at the beginning so we should have enough space */
3922 assert(offset + sizeof(struct vm_map_info_hdr) < SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE);
3923 struct vm_map_info_hdr* out_hdr = (struct vm_map_info_hdr*)(buf + offset);
3924 out_hdr->vmi_nentries = nentries;
3925 offset += sizeof(struct vm_map_info_hdr);
3926 return KERN_SUCCESS;
3927 };
3928
3929 kern_return_t (^write_entry)(void*) = ^kern_return_t (void* entry) {
3930 while (true) { /* try up to 2 times, first try write the the current buffer, otherwise to a new buffer */
3931 size_t left_sz = SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE - offset;
3932 kern_return_t kr = vm_map_dump_entry_and_compressor_pager(entry, buf + offset, &left_sz);
3933 if (kr == KERN_NO_SPACE) {
3934 /* failed to write anything, flush the current buffer and try again */
3935 if (offset == 0) {
3936 return KERN_FAILURE; /* no space to write but I didn't write anything yet, shouldn't really happen */
3937 }
3938 /* write out chunk */
3939 int out_error = SYSCTL_OUT(req, buf, offset);
3940 if (out_error) {
3941 return KERN_FAILURE;
3942 }
3943 offset = 0;
3944 bzero(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE); /* zero any reserved bits that are not going to be filled */
3945 continue; /* need to retry the entry dump again with the cleaned buffer */
3946 } else if (kr != KERN_SUCCESS) {
3947 return kr;
3948 }
3949 offset += left_sz;
3950 break;
3951 }
3952 return KERN_SUCCESS;
3953 };
3954
3955 /* this foreach first calls to the first callback with the number of entries, then calls the second for every entry
3956 * when the buffer is exhausted, it is flushed to the sysctl and restarted */
3957 kern_return_t kr = vm_map_entries_foreach(map, write_header, write_entry);
3958
3959 if (kr != KERN_SUCCESS) {
3960 goto out;
3961 }
3962
3963 if (offset > 0) { /* last chunk */
3964 error = SYSCTL_OUT(req, buf, offset);
3965 }
3966
3967 out:
3968 if (buf != NULL) {
3969 kfree_data(buf, SYSCTL_VM_OBJECTS_SLOTMAP_BUF_SIZE)
3970 }
3971 if (map != NULL) {
3972 vm_map_deallocate(map);
3973 }
3974 return error;
3975 }
3976
3977 SYSCTL_PROC(_vm, OID_AUTO, task_vm_objects_slotmap, CTLTYPE_NODE | CTLFLAG_LOCKED | CTLFLAG_RD, 0, 0, sysctl_task_vm_objects_slotmap, "S", "");
3978
3979 #pragma mark VM Host Statistics
3980
3981 SYSCTL_NODE(_vm, OID_AUTO, stat, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Host memory statistics");
3982
3983 SYSCTL_SCALABLE_COUNTER(_vm_stat, zero_fills, vm_statistics_zero_fill_count, "Pages zero-filled");
3984 SYSCTL_SCALABLE_COUNTER(_vm_stat, reactivations, vm_statistics_reactivations, "Pages reactivated");
3985 SYSCTL_SCALABLE_COUNTER(_vm_stat, pageins, vm_statistics_pageins, "Pages paged-in (including speculation)");
3986 SYSCTL_SCALABLE_COUNTER(_vm_stat, pageins_requested, vm_statistics_pageins_requested, "Page-ins requested");
3987 SYSCTL_SCALABLE_COUNTER(_vm_stat, pageins_aborted, vm_statistics_pageins_aborted, "Pages aborted during page-in");
3988 SYSCTL_SCALABLE_COUNTER(_vm_stat, pageouts, vm_statistics_pageouts, "Pages paged-out");
3989 SYSCTL_SCALABLE_COUNTER(_vm_stat, faults, vm_statistics_faults, "Pages faulted");
3990 SYSCTL_SCALABLE_COUNTER(_vm_stat, cow_faults, vm_statistics_cow_faults, "Pages faulted due to copy-on-write");
3991 SYSCTL_SCALABLE_COUNTER(_vm_stat, obj_cache_lookups, vm_statistics_lookups, "Pages looked up in the object-cache");
3992 SYSCTL_SCALABLE_COUNTER(_vm_stat, obj_cache_hits, vm_statistics_hits, "Object-cache lookup hits");
3993 SYSCTL_SCALABLE_COUNTER(_vm_stat, purges, vm_statistics_purges, "Pages purged");
3994 SYSCTL_SCALABLE_COUNTER(_vm_stat, decompressions, vm_statistics_decompressions, "Pages decompressed");
3995 SYSCTL_SCALABLE_COUNTER(_vm_stat, compressions, vm_statistics_compressions, "Pages compressed");
3996 SYSCTL_SCALABLE_COUNTER(_vm_stat, swapins, vm_statistics_swapins, "Pages swapped in");
3997 SYSCTL_SCALABLE_COUNTER(_vm_stat, swapouts, vm_statistics_swapouts, "Pages swapped out");
3998
3999 static int
4000 systctl_vm_reset_tag SYSCTL_HANDLER_ARGS
4001 {
4002 #pragma unused(oidp, arg1, arg2)
4003 int error;
4004 int tag;
4005 kern_return_t kr;
4006
4007 /* Need to be root */
4008 if (!kauth_cred_issuser(kauth_cred_get())) {
4009 return EPERM;
4010 }
4011
4012 error = SYSCTL_IN(req, &tag, sizeof(tag));
4013 if (error) {
4014 return error;
4015 }
4016
4017 if (tag > VM_MAX_TAG_VALUE) {
4018 return EINVAL;
4019 }
4020
4021 kr = vm_tag_reset_peak((vm_tag_t)tag);
4022
4023 return mach_to_bsd_errno(kr);
4024 }
4025
4026 SYSCTL_PROC(_vm, OID_AUTO, reset_tag,
4027 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
4028 0, 0, &systctl_vm_reset_tag, "I", "");
4029
4030 static int
4031 systctl_vm_reset_all_tags SYSCTL_HANDLER_ARGS
4032 {
4033 #pragma unused(oidp, arg1, arg2)
4034 /* Only reset the values if the sysctl is a write */
4035 if (!req->newptr) {
4036 return EINVAL;
4037 }
4038
4039 /* Need to be root */
4040 if (!kauth_cred_issuser(kauth_cred_get())) {
4041 return EPERM;
4042 }
4043
4044 vm_tag_reset_all_peaks();
4045
4046 return 0;
4047 }
4048
4049 SYSCTL_PROC(_vm, OID_AUTO, reset_all_tags,
4050 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
4051 0, 0, &systctl_vm_reset_all_tags, "I", "");
4052
4053 #endif /* DEVELOPMENT || DEBUG */
4054
4055 SYSCTL_NODE(_vm, OID_AUTO, compressor, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "VM Compressor");
4056
4057 SYSCTL_INT(_vm_compressor, OID_AUTO, mode, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_mode, 0, "");
4058 SYSCTL_INT(_vm_compressor, OID_AUTO, is_active, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_is_active, 0, "");
4059 SYSCTL_INT(_vm_compressor, OID_AUTO, is_available, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_available, 0, "");
4060 SYSCTL_UINT(_vm_compressor, OID_AUTO, pages_compressed, CTLFLAG_RD | CTLFLAG_LOCKED,
4061 &c_segment_pages_compressed, 0, "The amount of uncompressed data stored in the compressor (in pages)");
4062 #if CONFIG_FREEZE
4063 SYSCTL_UINT(_vm_compressor, OID_AUTO, pages_compressed_incore, CTLFLAG_RD | CTLFLAG_LOCKED,
4064 &c_segment_pages_compressed_incore, 0, "The amount of uncompressed data stored in the in-core compressor (in pages)");
4065 SYSCTL_UINT(_vm_compressor, OID_AUTO, pages_compressed_incore_late_swapout, CTLFLAG_RD | CTLFLAG_LOCKED,
4066 &c_segment_pages_compressed_incore_late_swapout, 0, "The amount of uncompressed data stored in the in-core compressor and queued for swapout (in pages)");
4067 #endif
4068 SYSCTL_UINT(_vm_compressor, OID_AUTO, pages_compressed_limit, CTLFLAG_RD | CTLFLAG_LOCKED,
4069 &c_segment_pages_compressed_limit, 0, "The limit on the amount of uncompressed data the compressor will store (in pages)");
4070
4071 SYSCTL_NODE(_vm_compressor, OID_AUTO, segment, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "VM Compressor Segment Counts");
4072 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, total, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_count, 0, "Number of allocated segments");
4073 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, aging, CTLFLAG_RD | CTLFLAG_LOCKED, &c_age_count, 0, "Number of aging segments");
4074 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swappedin_early, CTLFLAG_RD | CTLFLAG_LOCKED, &c_early_swappedin_count, 0, "Number of (early) swapped-in segments");
4075 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swappedin_regular, CTLFLAG_RD | CTLFLAG_LOCKED, &c_regular_swappedin_count, 0, "Number of (regular) swapped-in segments");
4076 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swappedin_late, CTLFLAG_RD | CTLFLAG_LOCKED, &c_late_swappedin_count, 0, "Number of (late) swapped-in segments");
4077 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swapout_early, CTLFLAG_RD | CTLFLAG_LOCKED, &c_early_swapout_count, 0, "Number of (early) ready-to-swap segments");
4078 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swapout_regular, CTLFLAG_RD | CTLFLAG_LOCKED, &c_regular_swapout_count, 0, "Number of (regular) ready-to-swap segments");
4079 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swapout_late, CTLFLAG_RD | CTLFLAG_LOCKED, &c_late_swapout_count, 0, "Number of (late) ready-to-swap segments");
4080 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swapio, CTLFLAG_RD | CTLFLAG_LOCKED, &c_swapio_count, 0, "Number of swapping-out segments");
4081 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swappedout, CTLFLAG_RD | CTLFLAG_LOCKED, &c_swappedout_count, 0, "Number of (non-sparse) swapped-out segments");
4082 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, swappedout_sparse, CTLFLAG_RD | CTLFLAG_LOCKED, &c_swappedout_sparse_count, 0, "Number of (sparse) swapped-out segments");
4083 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, majorcompact, CTLFLAG_RD | CTLFLAG_LOCKED, &c_major_count, 0, "Number of recently-compacted segments");
4084 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, minorcompact, CTLFLAG_RD | CTLFLAG_LOCKED, &c_minor_count, 0, "Number of segments queued for deferred minor compaction");
4085 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, filling, CTLFLAG_RD | CTLFLAG_LOCKED, &c_filling_count, 0, "Number of filling segments");
4086 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, empty, CTLFLAG_RD | CTLFLAG_LOCKED, &c_empty_count, 0, "Number of empty segments");
4087 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, bad, CTLFLAG_RD | CTLFLAG_LOCKED, &c_bad_count, 0, "Number of bad segments");
4088 SYSCTL_UINT(_vm_compressor_segment, OID_AUTO, limit, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segments_limit, 0, "Limit on the number of allocated segments");
4089
4090 SYSCTL_NODE(_vm_compressor, OID_AUTO, svp, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "VM Compressor Single-Value");
4091 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, in_hash, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_in_hash, 0, "");
4092 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, hash_succeeded, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_hash_succeeded, 0, "");
4093 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, hash_failed, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_hash_failed, 0, "");
4094 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, zval_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_zero_compressions, 0, "");
4095 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, zval_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_zero_decompressions, 0, "");
4096 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, nzval_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_nonzero_compressions, 0, "");
4097 SYSCTL_UINT(_vm_compressor_svp, OID_AUTO, nzval_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_svp_nonzero_decompressions, 0, "");
4098
4099 SYSCTL_NODE(_vm_compressor, OID_AUTO, compactor, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "VM Compressor Compactor");
4100 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compactions_completed, CTLFLAG_RD | CTLFLAG_LOCKED,
4101 &vm_pageout_vminfo.vm_compactor_major_compactions_completed, "Major compactions completed");
4102 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compactions_considered, CTLFLAG_RD | CTLFLAG_LOCKED,
4103 &vm_pageout_vminfo.vm_compactor_major_compactions_considered, "Major compactions considered");
4104 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compactions_bailed, CTLFLAG_RD | CTLFLAG_LOCKED,
4105 &vm_pageout_vminfo.vm_compactor_major_compactions_bailed, "Major compactions bailed (due to contention)");
4106 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compaction_bytes_moved, CTLFLAG_RD | CTLFLAG_LOCKED,
4107 &vm_pageout_vminfo.vm_compactor_major_compaction_bytes_moved, "Bytes moved between segments during major compactions");
4108 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compaction_slots_moved, CTLFLAG_RD | CTLFLAG_LOCKED,
4109 &vm_pageout_vminfo.vm_compactor_major_compaction_slots_moved, "Slots moved between segments during major compactions");
4110 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compaction_bytes_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
4111 &vm_pageout_vminfo.vm_compactor_major_compaction_bytes_freed, "Bytes freed as a result of major compaction");
4112 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, major_compaction_segments_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
4113 &vm_pageout_vminfo.vm_compactor_major_compaction_segments_freed, "Segments freed as a result of major compaction");
4114 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, swapouts_queued, CTLFLAG_RD | CTLFLAG_LOCKED,
4115 &vm_pageout_vminfo.vm_compactor_swapouts_queued, "The number of segments queued for swapout after a major compaction");
4116 SYSCTL_QUAD(_vm_compressor_compactor, OID_AUTO, swapout_bytes_wasted, CTLFLAG_RD | CTLFLAG_LOCKED,
4117 &vm_pageout_vminfo.vm_compactor_swapout_bytes_wasted, "The number of unused bytes in segments queued for swapout");
4118