1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include "vm/vm_map.h"
67 #include <mach/vm_types.h>
68 #include <mach_assert.h>
69
70 #include <vm/vm_options.h>
71
72 #include <libkern/OSAtomic.h>
73
74 #include <mach/kern_return.h>
75 #include <mach/port.h>
76 #include <mach/vm_attributes.h>
77 #include <mach/vm_param.h>
78 #include <mach/vm_behavior.h>
79 #include <mach/vm_statistics.h>
80 #include <mach/memory_object.h>
81 #include <mach/mach_vm.h>
82 #include <machine/cpu_capabilities.h>
83 #include <mach/sdt.h>
84
85 #include <kern/assert.h>
86 #include <kern/backtrace.h>
87 #include <kern/counter.h>
88 #include <kern/exc_guard.h>
89 #include <kern/kalloc.h>
90 #include <kern/zalloc_internal.h>
91
92 #include <vm/cpm.h>
93 #include <vm/vm_compressor.h>
94 #include <vm/vm_compressor_pager.h>
95 #include <vm/vm_init.h>
96 #include <vm/vm_fault.h>
97 #include <vm/vm_map_internal.h>
98 #include <vm/vm_object.h>
99 #include <vm/vm_page.h>
100 #include <vm/vm_pageout.h>
101 #include <vm/pmap.h>
102 #include <vm/vm_kern.h>
103 #include <ipc/ipc_port.h>
104 #include <kern/sched_prim.h>
105 #include <kern/misc_protos.h>
106
107 #include <mach/vm_map_server.h>
108 #include <mach/mach_host_server.h>
109 #include <vm/vm_protos.h>
110 #include <vm/vm_purgeable_internal.h>
111 #include <vm/vm_reclaim_internal.h>
112
113 #include <vm/vm_protos.h>
114 #include <vm/vm_shared_region.h>
115 #include <vm/vm_map_store.h>
116
117 #include <san/kasan.h>
118
119 #include <sys/resource.h>
120 #include <sys/codesign.h>
121 #include <sys/code_signing.h>
122 #include <sys/mman.h>
123 #include <sys/reboot.h>
124 #include <sys/kdebug_triage.h>
125
126 #include <libkern/section_keywords.h>
127
128 #if DEVELOPMENT || DEBUG
129 extern int proc_selfcsflags(void);
130 int vm_log_xnu_user_debug = 0;
131 int panic_on_unsigned_execute = 0;
132 int panic_on_mlock_failure = 0;
133 #endif /* DEVELOPMENT || DEBUG */
134
135 #if MACH_ASSERT
136 int debug4k_filter = 0;
137 char debug4k_proc_name[1024] = "";
138 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
139 int debug4k_panic_on_misaligned_sharing = 0;
140 const char *debug4k_category_name[] = {
141 "error", /* 0 */
142 "life", /* 1 */
143 "load", /* 2 */
144 "fault", /* 3 */
145 "copy", /* 4 */
146 "share", /* 5 */
147 "adjust", /* 6 */
148 "pmap", /* 7 */
149 "mementry", /* 8 */
150 "iokit", /* 9 */
151 "upl", /* 10 */
152 "exc", /* 11 */
153 "vfs" /* 12 */
154 };
155 #endif /* MACH_ASSERT */
156 int debug4k_no_cow_copyin = 0;
157
158
159 #if __arm64__
160 extern const int fourk_binary_compatibility_unsafe;
161 extern const int fourk_binary_compatibility_allow_wx;
162 #endif /* __arm64__ */
163 extern int proc_selfpid(void);
164 extern char *proc_name_address(void *p);
165 extern char *proc_best_name(struct proc *p);
166
167 #if VM_MAP_DEBUG_APPLE_PROTECT
168 int vm_map_debug_apple_protect = 0;
169 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
170 #if VM_MAP_DEBUG_FOURK
171 int vm_map_debug_fourk = 0;
172 #endif /* VM_MAP_DEBUG_FOURK */
173
174 #if DEBUG || DEVELOPMENT
175 static TUNABLE(bool, vm_map_executable_immutable,
176 "vm_map_executable_immutable", true);
177 #else
178 #define vm_map_executable_immutable true
179 #endif
180
181 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
182
183 extern u_int32_t random(void); /* from <libkern/libkern.h> */
184 /* Internal prototypes
185 */
186
187 typedef struct vm_map_zap {
188 vm_map_entry_t vmz_head;
189 vm_map_entry_t *vmz_tail;
190 } *vm_map_zap_t;
191
192 #define VM_MAP_ZAP_DECLARE(zap) \
193 struct vm_map_zap zap = { .vmz_tail = &zap.vmz_head }
194
195 static vm_map_entry_t vm_map_entry_insert(
196 vm_map_t map,
197 vm_map_entry_t insp_entry,
198 vm_map_offset_t start,
199 vm_map_offset_t end,
200 vm_object_t object,
201 vm_object_offset_t offset,
202 vm_map_kernel_flags_t vmk_flags,
203 boolean_t needs_copy,
204 vm_prot_t cur_protection,
205 vm_prot_t max_protection,
206 vm_inherit_t inheritance,
207 boolean_t clear_map_aligned);
208
209 static void vm_map_simplify_range(
210 vm_map_t map,
211 vm_map_offset_t start,
212 vm_map_offset_t end); /* forward */
213
214 static boolean_t vm_map_range_check(
215 vm_map_t map,
216 vm_map_offset_t start,
217 vm_map_offset_t end,
218 vm_map_entry_t *entry);
219
220 static void vm_map_submap_pmap_clean(
221 vm_map_t map,
222 vm_map_offset_t start,
223 vm_map_offset_t end,
224 vm_map_t sub_map,
225 vm_map_offset_t offset);
226
227 static void vm_map_pmap_enter(
228 vm_map_t map,
229 vm_map_offset_t addr,
230 vm_map_offset_t end_addr,
231 vm_object_t object,
232 vm_object_offset_t offset,
233 vm_prot_t protection);
234
235 static void _vm_map_clip_end(
236 struct vm_map_header *map_header,
237 vm_map_entry_t entry,
238 vm_map_offset_t end);
239
240 static void _vm_map_clip_start(
241 struct vm_map_header *map_header,
242 vm_map_entry_t entry,
243 vm_map_offset_t start);
244
245 static kmem_return_t vm_map_delete(
246 vm_map_t map,
247 vm_map_offset_t start,
248 vm_map_offset_t end,
249 vmr_flags_t flags,
250 kmem_guard_t guard,
251 vm_map_zap_t zap);
252
253 static void vm_map_copy_insert(
254 vm_map_t map,
255 vm_map_entry_t after_where,
256 vm_map_copy_t copy);
257
258 static kern_return_t vm_map_copy_overwrite_unaligned(
259 vm_map_t dst_map,
260 vm_map_entry_t entry,
261 vm_map_copy_t copy,
262 vm_map_address_t start,
263 boolean_t discard_on_success);
264
265 static kern_return_t vm_map_copy_overwrite_aligned(
266 vm_map_t dst_map,
267 vm_map_entry_t tmp_entry,
268 vm_map_copy_t copy,
269 vm_map_offset_t start,
270 pmap_t pmap);
271
272 static kern_return_t vm_map_copyin_kernel_buffer(
273 vm_map_t src_map,
274 vm_map_address_t src_addr,
275 vm_map_size_t len,
276 boolean_t src_destroy,
277 vm_map_copy_t *copy_result); /* OUT */
278
279 static kern_return_t vm_map_copyout_kernel_buffer(
280 vm_map_t map,
281 vm_map_address_t *addr, /* IN/OUT */
282 vm_map_copy_t copy,
283 vm_map_size_t copy_size,
284 boolean_t overwrite,
285 boolean_t consume_on_success);
286
287 static void vm_map_fork_share(
288 vm_map_t old_map,
289 vm_map_entry_t old_entry,
290 vm_map_t new_map);
291
292 static boolean_t vm_map_fork_copy(
293 vm_map_t old_map,
294 vm_map_entry_t *old_entry_p,
295 vm_map_t new_map,
296 int vm_map_copyin_flags);
297
298 static kern_return_t vm_map_wire_nested(
299 vm_map_t map,
300 vm_map_offset_t start,
301 vm_map_offset_t end,
302 vm_prot_t caller_prot,
303 vm_tag_t tag,
304 boolean_t user_wire,
305 pmap_t map_pmap,
306 vm_map_offset_t pmap_addr,
307 ppnum_t *physpage_p);
308
309 static kern_return_t vm_map_unwire_nested(
310 vm_map_t map,
311 vm_map_offset_t start,
312 vm_map_offset_t end,
313 boolean_t user_wire,
314 pmap_t map_pmap,
315 vm_map_offset_t pmap_addr);
316
317 static kern_return_t vm_map_overwrite_submap_recurse(
318 vm_map_t dst_map,
319 vm_map_offset_t dst_addr,
320 vm_map_size_t dst_size);
321
322 static kern_return_t vm_map_copy_overwrite_nested(
323 vm_map_t dst_map,
324 vm_map_offset_t dst_addr,
325 vm_map_copy_t copy,
326 boolean_t interruptible,
327 pmap_t pmap,
328 boolean_t discard_on_success);
329
330 static kern_return_t vm_map_remap_extract(
331 vm_map_t map,
332 vm_map_offset_t addr,
333 vm_map_size_t size,
334 boolean_t copy,
335 vm_map_copy_t map_copy,
336 vm_prot_t *cur_protection,
337 vm_prot_t *max_protection,
338 vm_inherit_t inheritance,
339 vm_map_kernel_flags_t vmk_flags);
340
341 static kern_return_t vm_map_remap_range_allocate(
342 vm_map_t map,
343 vm_map_address_t *address,
344 vm_map_size_t size,
345 vm_map_offset_t mask,
346 vm_map_kernel_flags_t vmk_flags,
347 vm_map_entry_t *map_entry,
348 vm_map_zap_t zap_list);
349
350 static void vm_map_region_look_for_page(
351 vm_map_t map,
352 vm_map_offset_t va,
353 vm_object_t object,
354 vm_object_offset_t offset,
355 int max_refcnt,
356 unsigned short depth,
357 vm_region_extended_info_t extended,
358 mach_msg_type_number_t count);
359
360 static int vm_map_region_count_obj_refs(
361 vm_map_entry_t entry,
362 vm_object_t object);
363
364
365 static kern_return_t vm_map_willneed(
366 vm_map_t map,
367 vm_map_offset_t start,
368 vm_map_offset_t end);
369
370 static kern_return_t vm_map_reuse_pages(
371 vm_map_t map,
372 vm_map_offset_t start,
373 vm_map_offset_t end);
374
375 static kern_return_t vm_map_reusable_pages(
376 vm_map_t map,
377 vm_map_offset_t start,
378 vm_map_offset_t end);
379
380 static kern_return_t vm_map_can_reuse(
381 vm_map_t map,
382 vm_map_offset_t start,
383 vm_map_offset_t end);
384
385 static kern_return_t vm_map_random_address_for_size(
386 vm_map_t map,
387 vm_map_offset_t *address,
388 vm_map_size_t size,
389 vm_map_kernel_flags_t vmk_flags);
390
391
392 #if CONFIG_MAP_RANGES
393
394 static vm_map_range_id_t vm_map_user_range_resolve(
395 vm_map_t map,
396 mach_vm_address_t addr,
397 mach_vm_address_t size,
398 mach_vm_range_t range);
399
400 #endif /* CONFIG_MAP_RANGES */
401 #if MACH_ASSERT
402 static kern_return_t vm_map_pageout(
403 vm_map_t map,
404 vm_map_offset_t start,
405 vm_map_offset_t end);
406 #endif /* MACH_ASSERT */
407
408 kern_return_t vm_map_corpse_footprint_collect(
409 vm_map_t old_map,
410 vm_map_entry_t old_entry,
411 vm_map_t new_map);
412 void vm_map_corpse_footprint_collect_done(
413 vm_map_t new_map);
414 void vm_map_corpse_footprint_destroy(
415 vm_map_t map);
416 kern_return_t vm_map_corpse_footprint_query_page_info(
417 vm_map_t map,
418 vm_map_offset_t va,
419 int *disposition_p);
420 void vm_map_footprint_query_page_info(
421 vm_map_t map,
422 vm_map_entry_t map_entry,
423 vm_map_offset_t curr_s_offset,
424 int *disposition_p);
425
426 #if CONFIG_MAP_RANGES
427 static void vm_map_range_map_init(void);
428 #endif /* CONFIG_MAP_RANGES */
429
430 pid_t find_largest_process_vm_map_entries(void);
431
432 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
433 mach_exception_data_type_t subcode);
434
435 /*
436 * Macros to copy a vm_map_entry. We must be careful to correctly
437 * manage the wired page count. vm_map_entry_copy() creates a new
438 * map entry to the same memory - the wired count in the new entry
439 * must be set to zero. vm_map_entry_copy_full() creates a new
440 * entry that is identical to the old entry. This preserves the
441 * wire count; it's used for map splitting and zone changing in
442 * vm_map_copyout.
443 */
444
445 static inline void
vm_map_entry_copy_csm_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)446 vm_map_entry_copy_csm_assoc(
447 vm_map_t map __unused,
448 vm_map_entry_t new __unused,
449 vm_map_entry_t old __unused)
450 {
451 #if CODE_SIGNING_MONITOR
452 /* when code signing monitor is enabled, we want to reset on copy */
453 new->csm_associated = FALSE;
454 #else
455 /* when code signing monitor is not enabled, assert as a sanity check */
456 assert(new->csm_associated == FALSE);
457 #endif
458 #if DEVELOPMENT || DEBUG
459 if (new->vme_xnu_user_debug && vm_log_xnu_user_debug) {
460 printf("FBDP %d[%s] %s:%d map %p entry %p [ 0x%llx 0x%llx ] resetting vme_xnu_user_debug\n",
461 proc_selfpid(),
462 (get_bsdtask_info(current_task())
463 ? proc_name_address(get_bsdtask_info(current_task()))
464 : "?"),
465 __FUNCTION__, __LINE__,
466 map, new, new->vme_start, new->vme_end);
467 }
468 #endif /* DEVELOPMENT || DEBUG */
469 new->vme_xnu_user_debug = FALSE;
470 }
471
472 /*
473 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
474 * But for security reasons on some platforms, we don't want the
475 * new mapping to be "used for jit", so we reset the flag here.
476 */
477 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)478 vm_map_entry_copy_code_signing(
479 vm_map_t map,
480 vm_map_entry_t new,
481 vm_map_entry_t old __unused)
482 {
483 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
484 assert(new->used_for_jit == old->used_for_jit);
485 } else {
486 new->used_for_jit = FALSE;
487 }
488 }
489
490 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)491 vm_map_entry_copy_full(
492 vm_map_entry_t new,
493 vm_map_entry_t old)
494 {
495 #if MAP_ENTRY_CREATION_DEBUG
496 btref_put(new->vme_creation_bt);
497 btref_retain(old->vme_creation_bt);
498 #endif
499 #if MAP_ENTRY_INSERTION_DEBUG
500 btref_put(new->vme_insertion_bt);
501 btref_retain(old->vme_insertion_bt);
502 #endif
503 *new = *old;
504 }
505
506 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)507 vm_map_entry_copy(
508 vm_map_t map,
509 vm_map_entry_t new,
510 vm_map_entry_t old)
511 {
512 vm_map_entry_copy_full(new, old);
513
514 new->is_shared = FALSE;
515 new->needs_wakeup = FALSE;
516 new->in_transition = FALSE;
517 new->wired_count = 0;
518 new->user_wired_count = 0;
519 new->vme_permanent = FALSE;
520 vm_map_entry_copy_code_signing(map, new, old);
521 vm_map_entry_copy_csm_assoc(map, new, old);
522 if (new->iokit_acct) {
523 assertf(!new->use_pmap, "old %p new %p\n", old, new);
524 new->iokit_acct = FALSE;
525 new->use_pmap = TRUE;
526 }
527 new->vme_resilient_codesign = FALSE;
528 new->vme_resilient_media = FALSE;
529 new->vme_atomic = FALSE;
530 new->vme_no_copy_on_read = FALSE;
531 }
532
533 /*
534 * Normal lock_read_to_write() returns FALSE/0 on failure.
535 * These functions evaluate to zero on success and non-zero value on failure.
536 */
537 __attribute__((always_inline))
538 int
vm_map_lock_read_to_write(vm_map_t map)539 vm_map_lock_read_to_write(vm_map_t map)
540 {
541 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
542 DTRACE_VM(vm_map_lock_upgrade);
543 return 0;
544 }
545 return 1;
546 }
547
548 __attribute__((always_inline))
549 boolean_t
vm_map_try_lock(vm_map_t map)550 vm_map_try_lock(vm_map_t map)
551 {
552 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
553 DTRACE_VM(vm_map_lock_w);
554 return TRUE;
555 }
556 return FALSE;
557 }
558
559 __attribute__((always_inline))
560 boolean_t
vm_map_try_lock_read(vm_map_t map)561 vm_map_try_lock_read(vm_map_t map)
562 {
563 if (lck_rw_try_lock_shared(&(map)->lock)) {
564 DTRACE_VM(vm_map_lock_r);
565 return TRUE;
566 }
567 return FALSE;
568 }
569
570 /*!
571 * @function kdp_vm_map_is_acquired_exclusive
572 *
573 * @abstract
574 * Checks if vm map is acquired exclusive.
575 *
576 * @discussion
577 * NOT SAFE: To be used only by kernel debugger.
578 *
579 * @param map map to check
580 *
581 * @returns TRUE if the map is acquired exclusively.
582 */
583 boolean_t
kdp_vm_map_is_acquired_exclusive(vm_map_t map)584 kdp_vm_map_is_acquired_exclusive(vm_map_t map)
585 {
586 return kdp_lck_rw_lock_is_acquired_exclusive(&map->lock);
587 }
588
589 /*
590 * Routines to get the page size the caller should
591 * use while inspecting the target address space.
592 * Use the "_safely" variant if the caller is dealing with a user-provided
593 * array whose size depends on the page size, to avoid any overflow or
594 * underflow of a user-allocated buffer.
595 */
596 int
vm_self_region_page_shift_safely(vm_map_t target_map)597 vm_self_region_page_shift_safely(
598 vm_map_t target_map)
599 {
600 int effective_page_shift = 0;
601
602 if (PAGE_SIZE == (4096)) {
603 /* x86_64 and 4k watches: always use 4k */
604 return PAGE_SHIFT;
605 }
606 /* did caller provide an explicit page size for this thread to use? */
607 effective_page_shift = thread_self_region_page_shift();
608 if (effective_page_shift) {
609 /* use the explicitly-provided page size */
610 return effective_page_shift;
611 }
612 /* no explicit page size: use the caller's page size... */
613 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
614 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
615 /* page size match: safe to use */
616 return effective_page_shift;
617 }
618 /* page size mismatch */
619 return -1;
620 }
621 int
vm_self_region_page_shift(vm_map_t target_map)622 vm_self_region_page_shift(
623 vm_map_t target_map)
624 {
625 int effective_page_shift;
626
627 effective_page_shift = vm_self_region_page_shift_safely(target_map);
628 if (effective_page_shift == -1) {
629 /* no safe value but OK to guess for caller */
630 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
631 VM_MAP_PAGE_SHIFT(target_map));
632 }
633 return effective_page_shift;
634 }
635
636
637 /*
638 * Decide if we want to allow processes to execute from their data or stack areas.
639 * override_nx() returns true if we do. Data/stack execution can be enabled independently
640 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
641 * or allow_stack_exec to enable data execution for that type of data area for that particular
642 * ABI (or both by or'ing the flags together). These are initialized in the architecture
643 * specific pmap files since the default behavior varies according to architecture. The
644 * main reason it varies is because of the need to provide binary compatibility with old
645 * applications that were written before these restrictions came into being. In the old
646 * days, an app could execute anything it could read, but this has slowly been tightened
647 * up over time. The default behavior is:
648 *
649 * 32-bit PPC apps may execute from both stack and data areas
650 * 32-bit Intel apps may exeucte from data areas but not stack
651 * 64-bit PPC/Intel apps may not execute from either data or stack
652 *
653 * An application on any architecture may override these defaults by explicitly
654 * adding PROT_EXEC permission to the page in question with the mprotect(2)
655 * system call. This code here just determines what happens when an app tries to
656 * execute from a page that lacks execute permission.
657 *
658 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
659 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
660 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
661 * execution from data areas for a particular binary even if the arch normally permits it. As
662 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
663 * to support some complicated use cases, notably browsers with out-of-process plugins that
664 * are not all NX-safe.
665 */
666
667 extern int allow_data_exec, allow_stack_exec;
668
669 int
override_nx(vm_map_t map,uint32_t user_tag)670 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
671 {
672 int current_abi;
673
674 if (map->pmap == kernel_pmap) {
675 return FALSE;
676 }
677
678 /*
679 * Determine if the app is running in 32 or 64 bit mode.
680 */
681
682 if (vm_map_is_64bit(map)) {
683 current_abi = VM_ABI_64;
684 } else {
685 current_abi = VM_ABI_32;
686 }
687
688 /*
689 * Determine if we should allow the execution based on whether it's a
690 * stack or data area and the current architecture.
691 */
692
693 if (user_tag == VM_MEMORY_STACK) {
694 return allow_stack_exec & current_abi;
695 }
696
697 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
698 }
699
700
701 /*
702 * Virtual memory maps provide for the mapping, protection,
703 * and sharing of virtual memory objects. In addition,
704 * this module provides for an efficient virtual copy of
705 * memory from one map to another.
706 *
707 * Synchronization is required prior to most operations.
708 *
709 * Maps consist of an ordered doubly-linked list of simple
710 * entries; a single hint is used to speed up lookups.
711 *
712 * Sharing maps have been deleted from this version of Mach.
713 * All shared objects are now mapped directly into the respective
714 * maps. This requires a change in the copy on write strategy;
715 * the asymmetric (delayed) strategy is used for shared temporary
716 * objects instead of the symmetric (shadow) strategy. All maps
717 * are now "top level" maps (either task map, kernel map or submap
718 * of the kernel map).
719 *
720 * Since portions of maps are specified by start/end addreses,
721 * which may not align with existing map entries, all
722 * routines merely "clip" entries to these start/end values.
723 * [That is, an entry is split into two, bordering at a
724 * start or end value.] Note that these clippings may not
725 * always be necessary (as the two resulting entries are then
726 * not changed); however, the clipping is done for convenience.
727 * No attempt is currently made to "glue back together" two
728 * abutting entries.
729 *
730 * The symmetric (shadow) copy strategy implements virtual copy
731 * by copying VM object references from one map to
732 * another, and then marking both regions as copy-on-write.
733 * It is important to note that only one writeable reference
734 * to a VM object region exists in any map when this strategy
735 * is used -- this means that shadow object creation can be
736 * delayed until a write operation occurs. The symmetric (delayed)
737 * strategy allows multiple maps to have writeable references to
738 * the same region of a vm object, and hence cannot delay creating
739 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
740 * Copying of permanent objects is completely different; see
741 * vm_object_copy_strategically() in vm_object.c.
742 */
743
744 ZONE_DECLARE_ID(ZONE_ID_VM_MAP_COPY, struct vm_map_copy);
745
746 #define VM_MAP_ZONE_NAME "maps"
747 #define VM_MAP_ZFLAGS (ZC_NOENCRYPT | ZC_VM)
748
749 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
750 #define VM_MAP_ENTRY_ZFLAGS (ZC_NOENCRYPT | ZC_VM)
751
752 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
753 #define VM_MAP_HOLES_ZFLAGS (ZC_NOENCRYPT | ZC_VM)
754
755 /*
756 * Asserts that a vm_map_copy object is coming from the
757 * vm_map_copy_zone to ensure that it isn't a fake constructed
758 * anywhere else.
759 */
760 void
vm_map_copy_require(struct vm_map_copy * copy)761 vm_map_copy_require(struct vm_map_copy *copy)
762 {
763 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
764 }
765
766 /*
767 * vm_map_require:
768 *
769 * Ensures that the argument is memory allocated from the genuine
770 * vm map zone. (See zone_id_require_allow_foreign).
771 */
772 void
vm_map_require(vm_map_t map)773 vm_map_require(vm_map_t map)
774 {
775 zone_id_require(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
776 }
777
778 #define VM_MAP_EARLY_COUNT_MAX 16
779 static __startup_data vm_offset_t map_data;
780 static __startup_data vm_size_t map_data_size;
781 static __startup_data vm_offset_t kentry_data;
782 static __startup_data vm_size_t kentry_data_size;
783 static __startup_data vm_offset_t map_holes_data;
784 static __startup_data vm_size_t map_holes_data_size;
785 static __startup_data vm_map_t *early_map_owners[VM_MAP_EARLY_COUNT_MAX];
786 static __startup_data uint32_t early_map_count;
787
788 #if XNU_TARGET_OS_OSX
789 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
790 #else /* XNU_TARGET_OS_OSX */
791 #define NO_COALESCE_LIMIT 0
792 #endif /* XNU_TARGET_OS_OSX */
793
794 /* Skip acquiring locks if we're in the midst of a kernel core dump */
795 unsigned int not_in_kdp = 1;
796
797 unsigned int vm_map_set_cache_attr_count = 0;
798
799 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)800 vm_map_set_cache_attr(
801 vm_map_t map,
802 vm_map_offset_t va)
803 {
804 vm_map_entry_t map_entry;
805 vm_object_t object;
806 kern_return_t kr = KERN_SUCCESS;
807
808 vm_map_lock_read(map);
809
810 if (!vm_map_lookup_entry(map, va, &map_entry) ||
811 map_entry->is_sub_map) {
812 /*
813 * that memory is not properly mapped
814 */
815 kr = KERN_INVALID_ARGUMENT;
816 goto done;
817 }
818 object = VME_OBJECT(map_entry);
819
820 if (object == VM_OBJECT_NULL) {
821 /*
822 * there should be a VM object here at this point
823 */
824 kr = KERN_INVALID_ARGUMENT;
825 goto done;
826 }
827 vm_object_lock(object);
828 object->set_cache_attr = TRUE;
829 vm_object_unlock(object);
830
831 vm_map_set_cache_attr_count++;
832 done:
833 vm_map_unlock_read(map);
834
835 return kr;
836 }
837
838
839 #if CONFIG_CODE_DECRYPTION
840 /*
841 * vm_map_apple_protected:
842 * This remaps the requested part of the object with an object backed by
843 * the decrypting pager.
844 * crypt_info contains entry points and session data for the crypt module.
845 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
846 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
847 */
848 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)849 vm_map_apple_protected(
850 vm_map_t map,
851 vm_map_offset_t start,
852 vm_map_offset_t end,
853 vm_object_offset_t crypto_backing_offset,
854 struct pager_crypt_info *crypt_info,
855 uint32_t cryptid)
856 {
857 boolean_t map_locked;
858 kern_return_t kr;
859 vm_map_entry_t map_entry;
860 struct vm_map_entry tmp_entry;
861 memory_object_t unprotected_mem_obj;
862 vm_object_t protected_object;
863 vm_map_offset_t map_addr;
864 vm_map_offset_t start_aligned, end_aligned;
865 vm_object_offset_t crypto_start, crypto_end;
866 boolean_t cache_pager;
867
868 map_locked = FALSE;
869 unprotected_mem_obj = MEMORY_OBJECT_NULL;
870
871 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
872 return KERN_INVALID_ADDRESS;
873 }
874 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
875 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
876 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
877 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
878
879 #if __arm64__
880 /*
881 * "start" and "end" might be 4K-aligned but not 16K-aligned,
882 * so we might have to loop and establish up to 3 mappings:
883 *
884 * + the first 16K-page, which might overlap with the previous
885 * 4K-aligned mapping,
886 * + the center,
887 * + the last 16K-page, which might overlap with the next
888 * 4K-aligned mapping.
889 * Each of these mapping might be backed by a vnode pager (if
890 * properly page-aligned) or a "fourk_pager", itself backed by a
891 * vnode pager (if 4K-aligned but not page-aligned).
892 */
893 #endif /* __arm64__ */
894
895 map_addr = start_aligned;
896 for (map_addr = start_aligned;
897 map_addr < end;
898 map_addr = tmp_entry.vme_end) {
899 vm_map_lock(map);
900 map_locked = TRUE;
901
902 /* lookup the protected VM object */
903 if (!vm_map_lookup_entry(map,
904 map_addr,
905 &map_entry) ||
906 map_entry->is_sub_map ||
907 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
908 /* that memory is not properly mapped */
909 kr = KERN_INVALID_ARGUMENT;
910 goto done;
911 }
912
913 /* ensure mapped memory is mapped as executable except
914 * except for model decryption flow */
915 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
916 !(map_entry->protection & VM_PROT_EXECUTE)) {
917 kr = KERN_INVALID_ARGUMENT;
918 goto done;
919 }
920
921 /* get the protected object to be decrypted */
922 protected_object = VME_OBJECT(map_entry);
923 if (protected_object == VM_OBJECT_NULL) {
924 /* there should be a VM object here at this point */
925 kr = KERN_INVALID_ARGUMENT;
926 goto done;
927 }
928 /* ensure protected object stays alive while map is unlocked */
929 vm_object_reference(protected_object);
930
931 /* limit the map entry to the area we want to cover */
932 vm_map_clip_start(map, map_entry, start_aligned);
933 vm_map_clip_end(map, map_entry, end_aligned);
934
935 tmp_entry = *map_entry;
936 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
937 vm_map_unlock(map);
938 map_locked = FALSE;
939
940 /*
941 * This map entry might be only partially encrypted
942 * (if not fully "page-aligned").
943 */
944 crypto_start = 0;
945 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
946 if (tmp_entry.vme_start < start) {
947 if (tmp_entry.vme_start != start_aligned) {
948 kr = KERN_INVALID_ADDRESS;
949 }
950 crypto_start += (start - tmp_entry.vme_start);
951 }
952 if (tmp_entry.vme_end > end) {
953 if (tmp_entry.vme_end != end_aligned) {
954 kr = KERN_INVALID_ADDRESS;
955 }
956 crypto_end -= (tmp_entry.vme_end - end);
957 }
958
959 /*
960 * This "extra backing offset" is needed to get the decryption
961 * routine to use the right key. It adjusts for the possibly
962 * relative offset of an interposed "4K" pager...
963 */
964 if (crypto_backing_offset == (vm_object_offset_t) -1) {
965 crypto_backing_offset = VME_OFFSET(&tmp_entry);
966 }
967
968 cache_pager = TRUE;
969 #if XNU_TARGET_OS_OSX
970 if (vm_map_is_alien(map)) {
971 cache_pager = FALSE;
972 }
973 #endif /* XNU_TARGET_OS_OSX */
974
975 /*
976 * Lookup (and create if necessary) the protected memory object
977 * matching that VM object.
978 * If successful, this also grabs a reference on the memory object,
979 * to guarantee that it doesn't go away before we get a chance to map
980 * it.
981 */
982 unprotected_mem_obj = apple_protect_pager_setup(
983 protected_object,
984 VME_OFFSET(&tmp_entry),
985 crypto_backing_offset,
986 crypt_info,
987 crypto_start,
988 crypto_end,
989 cache_pager);
990
991 /* release extra ref on protected object */
992 vm_object_deallocate(protected_object);
993
994 if (unprotected_mem_obj == NULL) {
995 kr = KERN_FAILURE;
996 goto done;
997 }
998
999 /* can overwrite an immutable mapping */
1000 vm_map_kernel_flags_t vmk_flags = {
1001 .vmf_fixed = true,
1002 .vmf_overwrite = true,
1003 .vmkf_overwrite_immutable = true,
1004 };
1005 #if __arm64__
1006 if (tmp_entry.used_for_jit &&
1007 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
1008 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
1009 fourk_binary_compatibility_unsafe &&
1010 fourk_binary_compatibility_allow_wx) {
1011 printf("** FOURK_COMPAT [%d]: "
1012 "allowing write+execute at 0x%llx\n",
1013 proc_selfpid(), tmp_entry.vme_start);
1014 vmk_flags.vmkf_map_jit = TRUE;
1015 }
1016 #endif /* __arm64__ */
1017
1018 /* map this memory object in place of the current one */
1019 map_addr = tmp_entry.vme_start;
1020 kr = vm_map_enter_mem_object(map,
1021 &map_addr,
1022 (tmp_entry.vme_end -
1023 tmp_entry.vme_start),
1024 (mach_vm_offset_t) 0,
1025 vmk_flags,
1026 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
1027 0,
1028 TRUE,
1029 tmp_entry.protection,
1030 tmp_entry.max_protection,
1031 tmp_entry.inheritance);
1032 assertf(kr == KERN_SUCCESS,
1033 "kr = 0x%x\n", kr);
1034 assertf(map_addr == tmp_entry.vme_start,
1035 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
1036 (uint64_t)map_addr,
1037 (uint64_t) tmp_entry.vme_start,
1038 &tmp_entry);
1039
1040 #if VM_MAP_DEBUG_APPLE_PROTECT
1041 if (vm_map_debug_apple_protect) {
1042 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
1043 " backing:[object:%p,offset:0x%llx,"
1044 "crypto_backing_offset:0x%llx,"
1045 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
1046 map,
1047 (uint64_t) map_addr,
1048 (uint64_t) (map_addr + (tmp_entry.vme_end -
1049 tmp_entry.vme_start)),
1050 unprotected_mem_obj,
1051 protected_object,
1052 VME_OFFSET(&tmp_entry),
1053 crypto_backing_offset,
1054 crypto_start,
1055 crypto_end);
1056 }
1057 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1058
1059 /*
1060 * Release the reference obtained by
1061 * apple_protect_pager_setup().
1062 * The mapping (if it succeeded) is now holding a reference on
1063 * the memory object.
1064 */
1065 memory_object_deallocate(unprotected_mem_obj);
1066 unprotected_mem_obj = MEMORY_OBJECT_NULL;
1067
1068 /* continue with next map entry */
1069 crypto_backing_offset += (tmp_entry.vme_end -
1070 tmp_entry.vme_start);
1071 crypto_backing_offset -= crypto_start;
1072 }
1073 kr = KERN_SUCCESS;
1074
1075 done:
1076 if (map_locked) {
1077 vm_map_unlock(map);
1078 }
1079 return kr;
1080 }
1081 #endif /* CONFIG_CODE_DECRYPTION */
1082
1083
1084 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1085 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1086 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1087
1088 #if XNU_TARGET_OS_OSX
1089 int malloc_no_cow = 0;
1090 #else /* XNU_TARGET_OS_OSX */
1091 int malloc_no_cow = 1;
1092 #endif /* XNU_TARGET_OS_OSX */
1093 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1094 #if DEBUG
1095 int vm_check_map_sanity = 0;
1096 #endif
1097
1098 /*
1099 * vm_map_init:
1100 *
1101 * Initialize the vm_map module. Must be called before
1102 * any other vm_map routines.
1103 *
1104 * Map and entry structures are allocated from zones -- we must
1105 * initialize those zones.
1106 *
1107 * There are three zones of interest:
1108 *
1109 * vm_map_zone: used to allocate maps.
1110 * vm_map_entry_zone: used to allocate map entries.
1111 *
1112 * LP32:
1113 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1114 *
1115 * The kernel allocates map entries from a special zone that is initially
1116 * "crammed" with memory. It would be difficult (perhaps impossible) for
1117 * the kernel to allocate more memory to a entry zone when it became
1118 * empty since the very act of allocating memory implies the creation
1119 * of a new entry.
1120 */
1121 __startup_func
1122 void
vm_map_init(void)1123 vm_map_init(void)
1124 {
1125
1126 #if MACH_ASSERT
1127 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1128 sizeof(debug4k_filter));
1129 #endif /* MACH_ASSERT */
1130
1131 zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1132 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1133
1134 /*
1135 * Don't quarantine because we always need elements available
1136 * Disallow GC on this zone... to aid the GC.
1137 */
1138 zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1139 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1140 ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1141 z->z_elems_rsv = (uint16_t)(32 *
1142 (ml_early_cpu_max_number() + 1));
1143 });
1144
1145 zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1146 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1147 ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1148 z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_outer_size(z));
1149 });
1150
1151 zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1152 ZC_NOENCRYPT, ZONE_ID_VM_MAP_COPY, NULL);
1153
1154 /*
1155 * Add the stolen memory to zones, adjust zone size and stolen counts.
1156 */
1157 zone_cram_early(vm_map_zone, map_data, map_data_size);
1158 zone_cram_early(vm_map_entry_zone, kentry_data, kentry_data_size);
1159 zone_cram_early(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1160 printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1161 zone_count_free(vm_map_zone),
1162 zone_count_free(vm_map_entry_zone),
1163 zone_count_free(vm_map_holes_zone));
1164
1165 /*
1166 * Since these are covered by zones, remove them from stolen page accounting.
1167 */
1168 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1169
1170 #if VM_MAP_DEBUG_APPLE_PROTECT
1171 PE_parse_boot_argn("vm_map_debug_apple_protect",
1172 &vm_map_debug_apple_protect,
1173 sizeof(vm_map_debug_apple_protect));
1174 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1175 #if VM_MAP_DEBUG_APPLE_FOURK
1176 PE_parse_boot_argn("vm_map_debug_fourk",
1177 &vm_map_debug_fourk,
1178 sizeof(vm_map_debug_fourk));
1179 #endif /* VM_MAP_DEBUG_FOURK */
1180
1181 PE_parse_boot_argn("malloc_no_cow",
1182 &malloc_no_cow,
1183 sizeof(malloc_no_cow));
1184 if (malloc_no_cow) {
1185 vm_memory_malloc_no_cow_mask = 0ULL;
1186 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1187 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1188 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1189 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1190 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1191 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1192 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1193 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1194 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1195 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1196 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1197 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1198 &vm_memory_malloc_no_cow_mask,
1199 sizeof(vm_memory_malloc_no_cow_mask));
1200 }
1201
1202 #if CONFIG_MAP_RANGES
1203 vm_map_range_map_init();
1204 #endif /* CONFIG_MAP_RANGES */
1205
1206 #if DEBUG
1207 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1208 if (vm_check_map_sanity) {
1209 kprintf("VM sanity checking enabled\n");
1210 } else {
1211 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1212 }
1213 #endif /* DEBUG */
1214
1215 #if DEVELOPMENT || DEBUG
1216 PE_parse_boot_argn("panic_on_unsigned_execute",
1217 &panic_on_unsigned_execute,
1218 sizeof(panic_on_unsigned_execute));
1219 PE_parse_boot_argn("panic_on_mlock_failure",
1220 &panic_on_mlock_failure,
1221 sizeof(panic_on_mlock_failure));
1222 #endif /* DEVELOPMENT || DEBUG */
1223 }
1224
1225 __startup_func
1226 static void
vm_map_steal_memory(void)1227 vm_map_steal_memory(void)
1228 {
1229 /*
1230 * We need to reserve enough memory to support boostraping VM maps
1231 * and the zone subsystem.
1232 *
1233 * The VM Maps that need to function before zones can support them
1234 * are the ones registered with vm_map_will_allocate_early_map(),
1235 * which are:
1236 * - the kernel map
1237 * - the various submaps used by zones (pgz, meta, ...)
1238 *
1239 * We also need enough entries and holes to support them
1240 * until zone_metadata_init() is called, which is when
1241 * the zone allocator becomes capable of expanding dynamically.
1242 *
1243 * We need:
1244 * - VM_MAP_EARLY_COUNT_MAX worth of VM Maps.
1245 * - To allow for 3-4 entries per map, but the kernel map
1246 * needs a multiple of VM_MAP_EARLY_COUNT_MAX entries
1247 * to describe the submaps, so double it (and make it 8x too)
1248 * - To allow for holes between entries,
1249 * hence needs the same budget as entries
1250 */
1251 map_data_size = zone_get_early_alloc_size(VM_MAP_ZONE_NAME,
1252 sizeof(struct _vm_map), VM_MAP_ZFLAGS,
1253 VM_MAP_EARLY_COUNT_MAX);
1254
1255 kentry_data_size = zone_get_early_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1256 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1257 8 * VM_MAP_EARLY_COUNT_MAX);
1258
1259 map_holes_data_size = zone_get_early_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1260 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1261 8 * VM_MAP_EARLY_COUNT_MAX);
1262
1263 /*
1264 * Steal a contiguous range of memory so that a simple range check
1265 * can validate early addresses being freed/crammed to these
1266 * zones
1267 */
1268 map_data = zone_early_mem_init(map_data_size + kentry_data_size +
1269 map_holes_data_size);
1270 kentry_data = map_data + map_data_size;
1271 map_holes_data = kentry_data + kentry_data_size;
1272 }
1273 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1274
1275 __startup_func
1276 static void
vm_kernel_boostraped(void)1277 vm_kernel_boostraped(void)
1278 {
1279 zone_enable_caching(&zone_array[ZONE_ID_VM_MAP_ENTRY]);
1280 zone_enable_caching(&zone_array[ZONE_ID_VM_MAP_HOLES]);
1281 zone_enable_caching(&zone_array[ZONE_ID_VM_MAP_COPY]);
1282
1283 printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1284 zone_count_free(vm_map_zone),
1285 zone_count_free(vm_map_entry_zone),
1286 zone_count_free(vm_map_holes_zone));
1287 }
1288 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1289
1290 void
vm_map_disable_hole_optimization(vm_map_t map)1291 vm_map_disable_hole_optimization(vm_map_t map)
1292 {
1293 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1294
1295 if (map->holelistenabled) {
1296 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1297
1298 while (hole_entry != NULL) {
1299 next_hole_entry = hole_entry->vme_next;
1300
1301 hole_entry->vme_next = NULL;
1302 hole_entry->vme_prev = NULL;
1303 zfree_id(ZONE_ID_VM_MAP_HOLES, hole_entry);
1304
1305 if (next_hole_entry == head_entry) {
1306 hole_entry = NULL;
1307 } else {
1308 hole_entry = next_hole_entry;
1309 }
1310 }
1311
1312 map->holes_list = NULL;
1313 map->holelistenabled = FALSE;
1314
1315 map->first_free = vm_map_first_entry(map);
1316 SAVE_HINT_HOLE_WRITE(map, NULL);
1317 }
1318 }
1319
1320 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1321 vm_kernel_map_is_kernel(vm_map_t map)
1322 {
1323 return map->pmap == kernel_pmap;
1324 }
1325
1326 /*
1327 * vm_map_create:
1328 *
1329 * Creates and returns a new empty VM map with
1330 * the given physical map structure, and having
1331 * the given lower and upper address bounds.
1332 */
1333
1334 extern vm_map_t vm_map_create_external(
1335 pmap_t pmap,
1336 vm_map_offset_t min_off,
1337 vm_map_offset_t max_off,
1338 boolean_t pageable);
1339
1340 vm_map_t
vm_map_create_external(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1341 vm_map_create_external(
1342 pmap_t pmap,
1343 vm_map_offset_t min,
1344 vm_map_offset_t max,
1345 boolean_t pageable)
1346 {
1347 vm_map_create_options_t options = VM_MAP_CREATE_DEFAULT;
1348
1349 if (pageable) {
1350 options |= VM_MAP_CREATE_PAGEABLE;
1351 }
1352 return vm_map_create_options(pmap, min, max, options);
1353 }
1354
1355 __startup_func
1356 void
vm_map_will_allocate_early_map(vm_map_t * owner)1357 vm_map_will_allocate_early_map(vm_map_t *owner)
1358 {
1359 if (early_map_count >= VM_MAP_EARLY_COUNT_MAX) {
1360 panic("VM_MAP_EARLY_COUNT_MAX is too low");
1361 }
1362
1363 early_map_owners[early_map_count++] = owner;
1364 }
1365
1366 __startup_func
1367 void
vm_map_relocate_early_maps(vm_offset_t delta)1368 vm_map_relocate_early_maps(vm_offset_t delta)
1369 {
1370 for (uint32_t i = 0; i < early_map_count; i++) {
1371 vm_address_t addr = (vm_address_t)*early_map_owners[i];
1372
1373 *early_map_owners[i] = (vm_map_t)(addr + delta);
1374 }
1375
1376 early_map_count = ~0u;
1377 }
1378
1379 /*
1380 * Routine: vm_map_relocate_early_elem
1381 *
1382 * Purpose:
1383 * Early zone elements are allocated in a temporary part
1384 * of the address space.
1385 *
1386 * Once the zones live in their final place, the early
1387 * VM maps, map entries and map holes need to be relocated.
1388 *
1389 * It involves rewriting any vm_map_t, vm_map_entry_t or
1390 * pointers to vm_map_links. Other pointers to other types
1391 * are fine.
1392 *
1393 * Fortunately, pointers to those types are self-contained
1394 * in those zones, _except_ for pointers to VM maps,
1395 * which are tracked during early boot and fixed with
1396 * vm_map_relocate_early_maps().
1397 */
1398 __startup_func
1399 void
vm_map_relocate_early_elem(uint32_t zone_id,vm_offset_t new_addr,vm_offset_t delta)1400 vm_map_relocate_early_elem(
1401 uint32_t zone_id,
1402 vm_offset_t new_addr,
1403 vm_offset_t delta)
1404 {
1405 #define relocate(type_t, field) ({ \
1406 typeof(((type_t)NULL)->field) *__field = &((type_t)new_addr)->field; \
1407 if (*__field) { \
1408 *__field = (typeof(*__field))((vm_offset_t)*__field + delta); \
1409 } \
1410 })
1411
1412 switch (zone_id) {
1413 case ZONE_ID_VM_MAP:
1414 case ZONE_ID_VM_MAP_ENTRY:
1415 case ZONE_ID_VM_MAP_HOLES:
1416 break;
1417
1418 default:
1419 panic("Unexpected zone ID %d", zone_id);
1420 }
1421
1422 if (zone_id == ZONE_ID_VM_MAP) {
1423 relocate(vm_map_t, hdr.links.prev);
1424 relocate(vm_map_t, hdr.links.next);
1425 ((vm_map_t)new_addr)->pmap = kernel_pmap;
1426 #ifdef VM_MAP_STORE_USE_RB
1427 relocate(vm_map_t, hdr.rb_head_store.rbh_root);
1428 #endif /* VM_MAP_STORE_USE_RB */
1429 relocate(vm_map_t, hint);
1430 relocate(vm_map_t, hole_hint);
1431 relocate(vm_map_t, first_free);
1432 return;
1433 }
1434
1435 relocate(struct vm_map_links *, prev);
1436 relocate(struct vm_map_links *, next);
1437
1438 if (zone_id == ZONE_ID_VM_MAP_ENTRY) {
1439 #ifdef VM_MAP_STORE_USE_RB
1440 relocate(vm_map_entry_t, store.entry.rbe_left);
1441 relocate(vm_map_entry_t, store.entry.rbe_right);
1442 relocate(vm_map_entry_t, store.entry.rbe_parent);
1443 #endif /* VM_MAP_STORE_USE_RB */
1444 if (((vm_map_entry_t)new_addr)->is_sub_map) {
1445 /* no object to relocate because we haven't made any */
1446 ((vm_map_entry_t)new_addr)->vme_submap +=
1447 delta >> VME_SUBMAP_SHIFT;
1448 }
1449 #if MAP_ENTRY_CREATION_DEBUG
1450 relocate(vm_map_entry_t, vme_creation_maphdr);
1451 #endif /* MAP_ENTRY_CREATION_DEBUG */
1452 }
1453
1454 #undef relocate
1455 }
1456
1457 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,vm_map_create_options_t options)1458 vm_map_create_options(
1459 pmap_t pmap,
1460 vm_map_offset_t min,
1461 vm_map_offset_t max,
1462 vm_map_create_options_t options)
1463 {
1464 vm_map_t result;
1465
1466 #if DEBUG || DEVELOPMENT
1467 if (__improbable(startup_phase < STARTUP_SUB_ZALLOC)) {
1468 if (early_map_count != ~0u && early_map_count !=
1469 zone_count_allocated(vm_map_zone) + 1) {
1470 panic("allocating %dth early map, owner not known",
1471 zone_count_allocated(vm_map_zone) + 1);
1472 }
1473 if (early_map_count != ~0u && pmap && pmap != kernel_pmap) {
1474 panic("allocating %dth early map for non kernel pmap",
1475 early_map_count);
1476 }
1477 }
1478 #endif /* DEBUG || DEVELOPMENT */
1479
1480 result = zalloc_id(ZONE_ID_VM_MAP, Z_WAITOK | Z_NOFAIL | Z_ZERO);
1481
1482 vm_map_store_init(&result->hdr);
1483 result->hdr.entries_pageable = (bool)(options & VM_MAP_CREATE_PAGEABLE);
1484 vm_map_set_page_shift(result, PAGE_SHIFT);
1485
1486 result->size_limit = RLIM_INFINITY; /* default unlimited */
1487 result->data_limit = RLIM_INFINITY; /* default unlimited */
1488 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1489 os_ref_init_count_raw(&result->map_refcnt, &map_refgrp, 1);
1490 result->pmap = pmap;
1491 result->min_offset = min;
1492 result->max_offset = max;
1493 result->first_free = vm_map_to_entry(result);
1494 result->hint = vm_map_to_entry(result);
1495
1496 if (options & VM_MAP_CREATE_NEVER_FAULTS) {
1497 assert(pmap == kernel_pmap);
1498 result->never_faults = true;
1499 }
1500
1501 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1502 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1503 result->has_corpse_footprint = true;
1504 } else if (!(options & VM_MAP_CREATE_DISABLE_HOLELIST)) {
1505 struct vm_map_links *hole_entry;
1506
1507 hole_entry = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
1508 hole_entry->start = min;
1509 #if defined(__arm64__)
1510 hole_entry->end = result->max_offset;
1511 #else
1512 hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1513 #endif
1514 result->holes_list = result->hole_hint = hole_entry;
1515 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1516 result->holelistenabled = true;
1517 }
1518
1519 vm_map_lock_init(result);
1520
1521 return result;
1522 }
1523
1524 /*
1525 * Adjusts a submap that was made by kmem_suballoc()
1526 * before it knew where it would be mapped,
1527 * so that it has the right min/max offsets.
1528 *
1529 * We do not need to hold any locks:
1530 * only the caller knows about this map,
1531 * and it is not published on any entry yet.
1532 */
1533 static void
vm_map_adjust_offsets(vm_map_t map,vm_map_offset_t min_off,vm_map_offset_t max_off)1534 vm_map_adjust_offsets(
1535 vm_map_t map,
1536 vm_map_offset_t min_off,
1537 vm_map_offset_t max_off)
1538 {
1539 assert(map->min_offset == 0);
1540 assert(map->max_offset == max_off - min_off);
1541 assert(map->hdr.nentries == 0);
1542 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1543
1544 map->min_offset = min_off;
1545 map->max_offset = max_off;
1546
1547 if (map->holelistenabled) {
1548 struct vm_map_links *hole = map->holes_list;
1549
1550 hole->start = min_off;
1551 #if defined(__arm64__)
1552 hole->end = max_off;
1553 #else
1554 hole->end = MAX(max_off, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1555 #endif
1556 }
1557 }
1558
1559
1560 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1561 vm_map_adjusted_size(vm_map_t map)
1562 {
1563 const struct vm_reserved_region *regions = NULL;
1564 size_t num_regions = 0;
1565 mach_vm_size_t reserved_size = 0, map_size = 0;
1566
1567 if (map == NULL || (map->size == 0)) {
1568 return 0;
1569 }
1570
1571 map_size = map->size;
1572
1573 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1574 /*
1575 * No special reserved regions or not an exotic map or the task
1576 * is terminating and these special regions might have already
1577 * been deallocated.
1578 */
1579 return map_size;
1580 }
1581
1582 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), ®ions);
1583 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1584
1585 while (num_regions) {
1586 reserved_size += regions[--num_regions].vmrr_size;
1587 }
1588
1589 /*
1590 * There are a few places where the map is being switched out due to
1591 * 'termination' without that bit being set (e.g. exec and corpse purging).
1592 * In those cases, we could have the map's regions being deallocated on
1593 * a core while some accounting process is trying to get the map's size.
1594 * So this assert can't be enabled till all those places are uniform in
1595 * their use of the 'map->terminated' bit.
1596 *
1597 * assert(map_size >= reserved_size);
1598 */
1599
1600 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1601 }
1602
1603 /*
1604 * vm_map_entry_create: [ internal use only ]
1605 *
1606 * Allocates a VM map entry for insertion in the
1607 * given map (or map copy). No fields are filled.
1608 *
1609 * The VM entry will be zero initialized, except for:
1610 * - behavior set to VM_BEHAVIOR_DEFAULT
1611 * - inheritance set to VM_INHERIT_DEFAULT
1612 */
1613 #define vm_map_entry_create(map) _vm_map_entry_create(&(map)->hdr)
1614
1615 #define vm_map_copy_entry_create(copy) _vm_map_entry_create(&(copy)->cpy_hdr)
1616
1617 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused)1618 _vm_map_entry_create(
1619 struct vm_map_header *map_header __unused)
1620 {
1621 vm_map_entry_t entry = NULL;
1622
1623 entry = zalloc_id(ZONE_ID_VM_MAP_ENTRY, Z_WAITOK | Z_ZERO);
1624
1625 /*
1626 * Help the compiler with what we know to be true,
1627 * so that the further bitfields inits have good codegen.
1628 *
1629 * See rdar://87041299
1630 */
1631 __builtin_assume(entry->vme_object_value == 0);
1632 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 1) == 0);
1633 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 2) == 0);
1634
1635 static_assert(VM_MAX_TAG_VALUE <= VME_ALIAS_MASK,
1636 "VME_ALIAS_MASK covers tags");
1637
1638 static_assert(VM_BEHAVIOR_DEFAULT == 0,
1639 "can skip zeroing of the behavior field");
1640 entry->inheritance = VM_INHERIT_DEFAULT;
1641
1642 #if MAP_ENTRY_CREATION_DEBUG
1643 entry->vme_creation_maphdr = map_header;
1644 entry->vme_creation_bt = btref_get(__builtin_frame_address(0),
1645 BTREF_GET_NOWAIT);
1646 #endif
1647 return entry;
1648 }
1649
1650 /*
1651 * vm_map_entry_dispose: [ internal use only ]
1652 *
1653 * Inverse of vm_map_entry_create.
1654 *
1655 * write map lock held so no need to
1656 * do anything special to insure correctness
1657 * of the stores
1658 */
1659 static void
vm_map_entry_dispose(vm_map_entry_t entry)1660 vm_map_entry_dispose(
1661 vm_map_entry_t entry)
1662 {
1663 #if MAP_ENTRY_CREATION_DEBUG
1664 btref_put(entry->vme_creation_bt);
1665 #endif
1666 #if MAP_ENTRY_INSERTION_DEBUG
1667 btref_put(entry->vme_insertion_bt);
1668 #endif
1669 zfree(vm_map_entry_zone, entry);
1670 }
1671
1672 #define vm_map_copy_entry_dispose(copy_entry) \
1673 vm_map_entry_dispose(copy_entry)
1674
1675 static vm_map_entry_t
vm_map_zap_first_entry(vm_map_zap_t list)1676 vm_map_zap_first_entry(
1677 vm_map_zap_t list)
1678 {
1679 return list->vmz_head;
1680 }
1681
1682 static vm_map_entry_t
vm_map_zap_last_entry(vm_map_zap_t list)1683 vm_map_zap_last_entry(
1684 vm_map_zap_t list)
1685 {
1686 assert(vm_map_zap_first_entry(list));
1687 return __container_of(list->vmz_tail, struct vm_map_entry, vme_next);
1688 }
1689
1690 static void
vm_map_zap_append(vm_map_zap_t list,vm_map_entry_t entry)1691 vm_map_zap_append(
1692 vm_map_zap_t list,
1693 vm_map_entry_t entry)
1694 {
1695 entry->vme_next = VM_MAP_ENTRY_NULL;
1696 *list->vmz_tail = entry;
1697 list->vmz_tail = &entry->vme_next;
1698 }
1699
1700 static vm_map_entry_t
vm_map_zap_pop(vm_map_zap_t list)1701 vm_map_zap_pop(
1702 vm_map_zap_t list)
1703 {
1704 vm_map_entry_t head = list->vmz_head;
1705
1706 if (head != VM_MAP_ENTRY_NULL &&
1707 (list->vmz_head = head->vme_next) == VM_MAP_ENTRY_NULL) {
1708 list->vmz_tail = &list->vmz_head;
1709 }
1710
1711 return head;
1712 }
1713
1714 static void
vm_map_zap_dispose(vm_map_zap_t list)1715 vm_map_zap_dispose(
1716 vm_map_zap_t list)
1717 {
1718 vm_map_entry_t entry;
1719
1720 while ((entry = vm_map_zap_pop(list))) {
1721 if (entry->is_sub_map) {
1722 vm_map_deallocate(VME_SUBMAP(entry));
1723 } else {
1724 vm_object_deallocate(VME_OBJECT(entry));
1725 }
1726
1727 vm_map_entry_dispose(entry);
1728 }
1729 }
1730
1731 #if MACH_ASSERT
1732 static boolean_t first_free_check = FALSE;
1733 boolean_t
first_free_is_valid(vm_map_t map)1734 first_free_is_valid(
1735 vm_map_t map)
1736 {
1737 if (!first_free_check) {
1738 return TRUE;
1739 }
1740
1741 return first_free_is_valid_store( map );
1742 }
1743 #endif /* MACH_ASSERT */
1744
1745
1746 #define vm_map_copy_entry_link(copy, after_where, entry) \
1747 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1748
1749 #define vm_map_copy_entry_unlink(copy, entry) \
1750 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry), false)
1751
1752 /*
1753 * vm_map_destroy:
1754 *
1755 * Actually destroy a map.
1756 */
1757 void
vm_map_destroy(vm_map_t map)1758 vm_map_destroy(
1759 vm_map_t map)
1760 {
1761 /* final cleanup: this is not allowed to fail */
1762 vmr_flags_t flags = VM_MAP_REMOVE_NO_FLAGS;
1763
1764 VM_MAP_ZAP_DECLARE(zap);
1765
1766 vm_map_lock(map);
1767
1768 map->terminated = true;
1769 /* clean up regular map entries */
1770 (void)vm_map_delete(map, map->min_offset, map->max_offset, flags,
1771 KMEM_GUARD_NONE, &zap);
1772 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1773 (void)vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, flags,
1774 KMEM_GUARD_NONE, &zap);
1775
1776 vm_map_disable_hole_optimization(map);
1777 vm_map_corpse_footprint_destroy(map);
1778
1779 vm_map_unlock(map);
1780
1781 vm_map_zap_dispose(&zap);
1782
1783 assert(map->hdr.nentries == 0);
1784
1785 if (map->pmap) {
1786 pmap_destroy(map->pmap);
1787 }
1788
1789 lck_rw_destroy(&map->lock, &vm_map_lck_grp);
1790
1791 zfree_id(ZONE_ID_VM_MAP, map);
1792 }
1793
1794 /*
1795 * Returns pid of the task with the largest number of VM map entries.
1796 * Used in the zone-map-exhaustion jetsam path.
1797 */
1798 pid_t
find_largest_process_vm_map_entries(void)1799 find_largest_process_vm_map_entries(void)
1800 {
1801 pid_t victim_pid = -1;
1802 int max_vm_map_entries = 0;
1803 task_t task = TASK_NULL;
1804 queue_head_t *task_list = &tasks;
1805
1806 lck_mtx_lock(&tasks_threads_lock);
1807 queue_iterate(task_list, task, task_t, tasks) {
1808 if (task == kernel_task || !task->active) {
1809 continue;
1810 }
1811
1812 vm_map_t task_map = task->map;
1813 if (task_map != VM_MAP_NULL) {
1814 int task_vm_map_entries = task_map->hdr.nentries;
1815 if (task_vm_map_entries > max_vm_map_entries) {
1816 max_vm_map_entries = task_vm_map_entries;
1817 victim_pid = pid_from_task(task);
1818 }
1819 }
1820 }
1821 lck_mtx_unlock(&tasks_threads_lock);
1822
1823 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1824 return victim_pid;
1825 }
1826
1827
1828 /*
1829 * vm_map_lookup_entry: [ internal use only ]
1830 *
1831 * Calls into the vm map store layer to find the map
1832 * entry containing (or immediately preceding) the
1833 * specified address in the given map; the entry is returned
1834 * in the "entry" parameter. The boolean
1835 * result indicates whether the address is
1836 * actually contained in the map.
1837 */
1838 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1839 vm_map_lookup_entry(
1840 vm_map_t map,
1841 vm_map_offset_t address,
1842 vm_map_entry_t *entry) /* OUT */
1843 {
1844 #if CONFIG_KERNEL_TBI
1845 if (VM_KERNEL_ADDRESS(address)) {
1846 address = VM_KERNEL_STRIP_UPTR(address);
1847 }
1848 #endif /* CONFIG_KERNEL_TBI */
1849 #if CONFIG_PROB_GZALLOC
1850 if (map->pmap == kernel_pmap) {
1851 assertf(!pgz_owned(address),
1852 "it is the responsibility of callers to unguard PGZ addresses");
1853 }
1854 #endif /* CONFIG_PROB_GZALLOC */
1855 return vm_map_store_lookup_entry( map, address, entry );
1856 }
1857
1858 boolean_t
vm_map_lookup_entry_or_next(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1859 vm_map_lookup_entry_or_next(
1860 vm_map_t map,
1861 vm_map_offset_t address,
1862 vm_map_entry_t *entry) /* OUT */
1863 {
1864 if (vm_map_lookup_entry(map, address, entry)) {
1865 return true;
1866 }
1867
1868 *entry = (*entry)->vme_next;
1869 return false;
1870 }
1871
1872 #if CONFIG_PROB_GZALLOC
1873 boolean_t
vm_map_lookup_entry_allow_pgz(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1874 vm_map_lookup_entry_allow_pgz(
1875 vm_map_t map,
1876 vm_map_offset_t address,
1877 vm_map_entry_t *entry) /* OUT */
1878 {
1879 #if CONFIG_KERNEL_TBI
1880 if (VM_KERNEL_ADDRESS(address)) {
1881 address = VM_KERNEL_STRIP_UPTR(address);
1882 }
1883 #endif /* CONFIG_KERNEL_TBI */
1884 return vm_map_store_lookup_entry( map, address, entry );
1885 }
1886 #endif /* CONFIG_PROB_GZALLOC */
1887
1888 /*
1889 * Routine: vm_map_range_invalid_panic
1890 * Purpose:
1891 * Panic on detection of an invalid range id.
1892 */
1893 __abortlike
1894 static void
vm_map_range_invalid_panic(vm_map_t map,vm_map_range_id_t range_id)1895 vm_map_range_invalid_panic(
1896 vm_map_t map,
1897 vm_map_range_id_t range_id)
1898 {
1899 panic("invalid range ID (%u) for map %p", range_id, map);
1900 }
1901
1902 /*
1903 * Routine: vm_map_get_range
1904 * Purpose:
1905 * Adjust bounds based on security policy.
1906 */
1907 static struct mach_vm_range
vm_map_get_range(vm_map_t map,vm_map_address_t * address,vm_map_kernel_flags_t * vmk_flags,vm_map_size_t size,bool * is_ptr)1908 vm_map_get_range(
1909 vm_map_t map,
1910 vm_map_address_t *address,
1911 vm_map_kernel_flags_t *vmk_flags,
1912 vm_map_size_t size,
1913 bool *is_ptr)
1914 {
1915 struct mach_vm_range effective_range = {};
1916 vm_map_range_id_t range_id = vmk_flags->vmkf_range_id;
1917
1918 if (map == kernel_map) {
1919 effective_range = kmem_ranges[range_id];
1920
1921 if (startup_phase >= STARTUP_SUB_KMEM) {
1922 /*
1923 * Hint provided by caller is zeroed as the range is restricted to a
1924 * subset of the entire kernel_map VA, which could put the hint outside
1925 * the range, causing vm_map_store_find_space to fail.
1926 */
1927 *address = 0ull;
1928 /*
1929 * Ensure that range_id passed in by the caller is within meaningful
1930 * bounds. Range id of KMEM_RANGE_ID_NONE will cause vm_map_locate_space
1931 * to fail as the corresponding range is invalid. Range id larger than
1932 * KMEM_RANGE_ID_MAX will lead to an OOB access.
1933 */
1934 if ((range_id == KMEM_RANGE_ID_NONE) ||
1935 (range_id > KMEM_RANGE_ID_MAX)) {
1936 vm_map_range_invalid_panic(map, range_id);
1937 }
1938
1939 /*
1940 * Pointer ranges use kmem_locate_space to do allocations.
1941 *
1942 * Non pointer fronts look like [ Small | Large | Permanent ]
1943 * Adjust range for allocations larger than KMEM_SMALLMAP_THRESHOLD.
1944 * Allocations smaller than KMEM_SMALLMAP_THRESHOLD are allowed to
1945 * use the entire range.
1946 */
1947 if (range_id < KMEM_RANGE_ID_SPRAYQTN) {
1948 *is_ptr = true;
1949 } else if (size >= KMEM_SMALLMAP_THRESHOLD) {
1950 effective_range = kmem_large_ranges[range_id];
1951 }
1952 }
1953 #if CONFIG_MAP_RANGES
1954 } else if (map->uses_user_ranges) {
1955 if (range_id > UMEM_RANGE_ID_MAX) {
1956 vm_map_range_invalid_panic(map, range_id);
1957 }
1958
1959 effective_range = map->user_range[range_id];
1960 #endif /* CONFIG_MAP_RANGES */
1961 } else {
1962 /*
1963 * If minimum is 0, bump it up by PAGE_SIZE. We want to limit
1964 * allocations of PAGEZERO to explicit requests since its
1965 * normal use is to catch dereferences of NULL and many
1966 * applications also treat pointers with a value of 0 as
1967 * special and suddenly having address 0 contain useable
1968 * memory would tend to confuse those applications.
1969 */
1970 effective_range.min_address = MAX(map->min_offset, VM_MAP_PAGE_SIZE(map));
1971 effective_range.max_address = map->max_offset;
1972 }
1973
1974 return effective_range;
1975 }
1976
1977 /*
1978 * Routine: vm_map_locate_space
1979 * Purpose:
1980 * Finds a range in the specified virtual address map,
1981 * returning the start of that range,
1982 * as well as the entry right before it.
1983 */
1984 kern_return_t
vm_map_locate_space(vm_map_t map,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)1985 vm_map_locate_space(
1986 vm_map_t map,
1987 vm_map_size_t size,
1988 vm_map_offset_t mask,
1989 vm_map_kernel_flags_t vmk_flags,
1990 vm_map_offset_t *start_inout,
1991 vm_map_entry_t *entry_out)
1992 {
1993 struct mach_vm_range effective_range = {};
1994 vm_map_size_t guard_offset;
1995 vm_map_offset_t hint, limit;
1996 vm_map_entry_t entry;
1997 bool is_kmem_ptr_range = false;
1998
1999 /*
2000 * Only supported by vm_map_enter() with a fixed address.
2001 */
2002 assert(!vmk_flags.vmkf_beyond_max);
2003
2004 if (__improbable(map->wait_for_space)) {
2005 /*
2006 * support for "wait_for_space" is minimal,
2007 * its only consumer is the ipc_kernel_copy_map.
2008 */
2009 assert(!map->holelistenabled &&
2010 !vmk_flags.vmkf_last_free &&
2011 !vmk_flags.vmkf_keep_map_locked &&
2012 !vmk_flags.vmkf_map_jit &&
2013 !vmk_flags.vmf_random_addr &&
2014 *start_inout <= map->min_offset);
2015 } else if (vmk_flags.vmkf_last_free) {
2016 assert(!vmk_flags.vmkf_map_jit &&
2017 !vmk_flags.vmf_random_addr);
2018 }
2019
2020 if (vmk_flags.vmkf_guard_before) {
2021 guard_offset = VM_MAP_PAGE_SIZE(map);
2022 assert(size > guard_offset);
2023 size -= guard_offset;
2024 } else {
2025 assert(size != 0);
2026 guard_offset = 0;
2027 }
2028
2029 /*
2030 * Validate range_id from flags and get associated range
2031 */
2032 effective_range = vm_map_get_range(map, start_inout, &vmk_flags, size,
2033 &is_kmem_ptr_range);
2034
2035 if (is_kmem_ptr_range) {
2036 return kmem_locate_space(size + guard_offset, vmk_flags.vmkf_range_id,
2037 vmk_flags.vmkf_last_free, start_inout, entry_out);
2038 }
2039
2040 #if XNU_TARGET_OS_OSX
2041 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2042 assert(map != kernel_map);
2043 effective_range.max_address = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2044 }
2045 #endif /* XNU_TARGET_OS_OSX */
2046
2047 again:
2048 if (vmk_flags.vmkf_last_free) {
2049 hint = *start_inout;
2050
2051 if (hint == 0 || hint > effective_range.max_address) {
2052 hint = effective_range.max_address;
2053 }
2054 if (hint <= effective_range.min_address) {
2055 return KERN_NO_SPACE;
2056 }
2057 limit = effective_range.min_address;
2058 } else {
2059 hint = *start_inout;
2060
2061 if (vmk_flags.vmkf_map_jit) {
2062 if (map->jit_entry_exists &&
2063 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2064 return KERN_INVALID_ARGUMENT;
2065 }
2066 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2067 vmk_flags.vmf_random_addr = true;
2068 }
2069 }
2070
2071 if (vmk_flags.vmf_random_addr) {
2072 kern_return_t kr;
2073
2074 kr = vm_map_random_address_for_size(map, &hint, size, vmk_flags);
2075 if (kr != KERN_SUCCESS) {
2076 return kr;
2077 }
2078 }
2079 #if XNU_TARGET_OS_OSX
2080 else if ((hint == 0 || hint == vm_map_min(map)) &&
2081 !map->disable_vmentry_reuse &&
2082 map->vmmap_high_start != 0) {
2083 hint = map->vmmap_high_start;
2084 }
2085 #endif /* XNU_TARGET_OS_OSX */
2086
2087 if (hint < effective_range.min_address) {
2088 hint = effective_range.min_address;
2089 }
2090 if (effective_range.max_address <= hint) {
2091 return KERN_NO_SPACE;
2092 }
2093
2094 limit = effective_range.max_address;
2095 }
2096 entry = vm_map_store_find_space(map,
2097 hint, limit, vmk_flags.vmkf_last_free,
2098 guard_offset, size, mask,
2099 start_inout);
2100
2101 if (__improbable(entry == NULL)) {
2102 if (map->wait_for_space &&
2103 guard_offset + size <=
2104 effective_range.max_address - effective_range.min_address) {
2105 assert_wait((event_t)map, THREAD_ABORTSAFE);
2106 vm_map_unlock(map);
2107 thread_block(THREAD_CONTINUE_NULL);
2108 vm_map_lock(map);
2109 goto again;
2110 }
2111 return KERN_NO_SPACE;
2112 }
2113
2114 if (entry_out) {
2115 *entry_out = entry;
2116 }
2117 return KERN_SUCCESS;
2118 }
2119
2120
2121 /*
2122 * Routine: vm_map_find_space
2123 * Purpose:
2124 * Allocate a range in the specified virtual address map,
2125 * returning the entry allocated for that range.
2126 * Used by kmem_alloc, etc.
2127 *
2128 * The map must be NOT be locked. It will be returned locked
2129 * on KERN_SUCCESS, unlocked on failure.
2130 *
2131 * If an entry is allocated, the object/offset fields
2132 * are initialized to zero.
2133 */
2134 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t hint_address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_entry_t * o_entry)2135 vm_map_find_space(
2136 vm_map_t map,
2137 vm_map_offset_t hint_address,
2138 vm_map_size_t size,
2139 vm_map_offset_t mask,
2140 vm_map_kernel_flags_t vmk_flags,
2141 vm_map_entry_t *o_entry) /* OUT */
2142 {
2143 vm_map_entry_t new_entry, entry;
2144 kern_return_t kr;
2145
2146 if (size == 0) {
2147 return KERN_INVALID_ARGUMENT;
2148 }
2149
2150 new_entry = vm_map_entry_create(map);
2151 new_entry->use_pmap = true;
2152 new_entry->protection = VM_PROT_DEFAULT;
2153 new_entry->max_protection = VM_PROT_ALL;
2154
2155 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2156 new_entry->map_aligned = true;
2157 }
2158 if (vmk_flags.vmf_permanent) {
2159 new_entry->vme_permanent = true;
2160 }
2161
2162 vm_map_lock(map);
2163
2164 kr = vm_map_locate_space(map, size, mask, vmk_flags,
2165 &hint_address, &entry);
2166 if (kr != KERN_SUCCESS) {
2167 vm_map_unlock(map);
2168 vm_map_entry_dispose(new_entry);
2169 return kr;
2170 }
2171 new_entry->vme_start = hint_address;
2172 new_entry->vme_end = hint_address + size;
2173
2174 /*
2175 * At this point,
2176 *
2177 * - new_entry's "vme_start" and "vme_end" should define
2178 * the endpoints of the available new range,
2179 *
2180 * - and "entry" should refer to the region before
2181 * the new range,
2182 *
2183 * - and the map should still be locked.
2184 */
2185
2186 assert(page_aligned(new_entry->vme_start));
2187 assert(page_aligned(new_entry->vme_end));
2188 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, VM_MAP_PAGE_MASK(map)));
2189 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, VM_MAP_PAGE_MASK(map)));
2190
2191 /*
2192 * Insert the new entry into the list
2193 */
2194
2195 vm_map_store_entry_link(map, entry, new_entry,
2196 VM_MAP_KERNEL_FLAGS_NONE);
2197 map->size += size;
2198
2199 /*
2200 * Update the lookup hint
2201 */
2202 SAVE_HINT_MAP_WRITE(map, new_entry);
2203
2204 *o_entry = new_entry;
2205 return KERN_SUCCESS;
2206 }
2207
2208 int vm_map_pmap_enter_print = FALSE;
2209 int vm_map_pmap_enter_enable = FALSE;
2210
2211 /*
2212 * Routine: vm_map_pmap_enter [internal only]
2213 *
2214 * Description:
2215 * Force pages from the specified object to be entered into
2216 * the pmap at the specified address if they are present.
2217 * As soon as a page not found in the object the scan ends.
2218 *
2219 * Returns:
2220 * Nothing.
2221 *
2222 * In/out conditions:
2223 * The source map should not be locked on entry.
2224 */
2225 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)2226 vm_map_pmap_enter(
2227 vm_map_t map,
2228 vm_map_offset_t addr,
2229 vm_map_offset_t end_addr,
2230 vm_object_t object,
2231 vm_object_offset_t offset,
2232 vm_prot_t protection)
2233 {
2234 int type_of_fault;
2235 kern_return_t kr;
2236 struct vm_object_fault_info fault_info = {};
2237
2238 if (map->pmap == 0) {
2239 return;
2240 }
2241
2242 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2243
2244 while (addr < end_addr) {
2245 vm_page_t m;
2246
2247
2248 /*
2249 * TODO:
2250 * From vm_map_enter(), we come into this function without the map
2251 * lock held or the object lock held.
2252 * We haven't taken a reference on the object either.
2253 * We should do a proper lookup on the map to make sure
2254 * that things are sane before we go locking objects that
2255 * could have been deallocated from under us.
2256 */
2257
2258 vm_object_lock(object);
2259
2260 m = vm_page_lookup(object, offset);
2261
2262 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2263 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_absent))) {
2264 vm_object_unlock(object);
2265 return;
2266 }
2267
2268 if (vm_map_pmap_enter_print) {
2269 printf("vm_map_pmap_enter:");
2270 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2271 map, (unsigned long long)addr, object, (unsigned long long)offset);
2272 }
2273 type_of_fault = DBG_CACHE_HIT_FAULT;
2274 kr = vm_fault_enter(m, map->pmap,
2275 addr,
2276 PAGE_SIZE, 0,
2277 protection, protection,
2278 VM_PAGE_WIRED(m),
2279 FALSE, /* change_wiring */
2280 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2281 &fault_info,
2282 NULL, /* need_retry */
2283 &type_of_fault);
2284
2285 vm_object_unlock(object);
2286
2287 offset += PAGE_SIZE_64;
2288 addr += PAGE_SIZE;
2289 }
2290 }
2291
2292 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2293 static kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_kernel_flags_t vmk_flags)2294 vm_map_random_address_for_size(
2295 vm_map_t map,
2296 vm_map_offset_t *address,
2297 vm_map_size_t size,
2298 vm_map_kernel_flags_t vmk_flags)
2299 {
2300 kern_return_t kr = KERN_SUCCESS;
2301 int tries = 0;
2302 vm_map_offset_t random_addr = 0;
2303 vm_map_offset_t hole_end;
2304
2305 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2306 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2307 vm_map_size_t vm_hole_size = 0;
2308 vm_map_size_t addr_space_size;
2309 bool is_kmem_ptr;
2310 struct mach_vm_range effective_range;
2311
2312 effective_range = vm_map_get_range(map, address, &vmk_flags, size,
2313 &is_kmem_ptr);
2314
2315 addr_space_size = effective_range.max_address - effective_range.min_address;
2316 if (size >= addr_space_size) {
2317 return KERN_NO_SPACE;
2318 }
2319 addr_space_size -= size;
2320
2321 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2322
2323 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2324 if (startup_phase < STARTUP_SUB_ZALLOC) {
2325 random_addr = (vm_map_offset_t)early_random();
2326 } else {
2327 random_addr = (vm_map_offset_t)random();
2328 }
2329 random_addr <<= VM_MAP_PAGE_SHIFT(map);
2330 random_addr = vm_map_trunc_page(
2331 effective_range.min_address + (random_addr % addr_space_size),
2332 VM_MAP_PAGE_MASK(map));
2333
2334 #if CONFIG_PROB_GZALLOC
2335 if (map->pmap == kernel_pmap && pgz_owned(random_addr)) {
2336 continue;
2337 }
2338 #endif /* CONFIG_PROB_GZALLOC */
2339
2340 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2341 if (prev_entry == vm_map_to_entry(map)) {
2342 next_entry = vm_map_first_entry(map);
2343 } else {
2344 next_entry = prev_entry->vme_next;
2345 }
2346 if (next_entry == vm_map_to_entry(map)) {
2347 hole_end = vm_map_max(map);
2348 } else {
2349 hole_end = next_entry->vme_start;
2350 }
2351 vm_hole_size = hole_end - random_addr;
2352 if (vm_hole_size >= size) {
2353 *address = random_addr;
2354 break;
2355 }
2356 }
2357 tries++;
2358 }
2359
2360 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2361 kr = KERN_NO_SPACE;
2362 }
2363 return kr;
2364 }
2365
2366 static boolean_t
vm_memory_malloc_no_cow(int alias)2367 vm_memory_malloc_no_cow(
2368 int alias)
2369 {
2370 uint64_t alias_mask;
2371
2372 if (alias > 63) {
2373 return FALSE;
2374 }
2375
2376 alias_mask = 1ULL << alias;
2377 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2378 return TRUE;
2379 }
2380 return FALSE;
2381 }
2382
2383 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2384 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2385 /*
2386 * Routine: vm_map_enter
2387 *
2388 * Description:
2389 * Allocate a range in the specified virtual address map.
2390 * The resulting range will refer to memory defined by
2391 * the given memory object and offset into that object.
2392 *
2393 * Arguments are as defined in the vm_map call.
2394 */
2395 static unsigned int vm_map_enter_restore_successes = 0;
2396 static unsigned int vm_map_enter_restore_failures = 0;
2397 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2398 vm_map_enter(
2399 vm_map_t map,
2400 vm_map_offset_t *address, /* IN/OUT */
2401 vm_map_size_t size,
2402 vm_map_offset_t mask,
2403 vm_map_kernel_flags_t vmk_flags,
2404 vm_object_t object,
2405 vm_object_offset_t offset,
2406 boolean_t needs_copy,
2407 vm_prot_t cur_protection,
2408 vm_prot_t max_protection,
2409 vm_inherit_t inheritance)
2410 {
2411 vm_map_entry_t entry, new_entry;
2412 vm_map_offset_t start, tmp_start, tmp_offset;
2413 vm_map_offset_t end, tmp_end;
2414 vm_map_offset_t tmp2_start, tmp2_end;
2415 vm_map_offset_t step;
2416 kern_return_t result = KERN_SUCCESS;
2417 bool map_locked = FALSE;
2418 bool pmap_empty = TRUE;
2419 bool new_mapping_established = FALSE;
2420 const bool keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2421 const bool anywhere = !vmk_flags.vmf_fixed;
2422 const bool purgable = vmk_flags.vmf_purgeable;
2423 const bool overwrite = vmk_flags.vmf_overwrite;
2424 const bool no_cache = vmk_flags.vmf_no_cache;
2425 const bool is_submap = vmk_flags.vmkf_submap;
2426 const bool permanent = vmk_flags.vmf_permanent;
2427 const bool no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2428 const bool entry_for_jit = vmk_flags.vmkf_map_jit;
2429 const bool iokit_acct = vmk_flags.vmkf_iokit_acct;
2430 const bool resilient_codesign = vmk_flags.vmf_resilient_codesign;
2431 const bool resilient_media = vmk_flags.vmf_resilient_media;
2432 const bool entry_for_tpro = vmk_flags.vmf_tpro;
2433 const unsigned int superpage_size = vmk_flags.vmf_superpage_size;
2434 const vm_tag_t alias = vmk_flags.vm_tag;
2435 vm_tag_t user_alias;
2436 kern_return_t kr;
2437 bool clear_map_aligned = FALSE;
2438 vm_map_size_t chunk_size = 0;
2439 vm_object_t caller_object;
2440 VM_MAP_ZAP_DECLARE(zap_old_list);
2441 VM_MAP_ZAP_DECLARE(zap_new_list);
2442
2443 caller_object = object;
2444
2445 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2446
2447 if (vmk_flags.vmf_4gb_chunk) {
2448 #if defined(__LP64__)
2449 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2450 #else /* __LP64__ */
2451 chunk_size = ANON_CHUNK_SIZE;
2452 #endif /* __LP64__ */
2453 } else {
2454 chunk_size = ANON_CHUNK_SIZE;
2455 }
2456
2457
2458
2459 if (superpage_size) {
2460 switch (superpage_size) {
2461 /*
2462 * Note that the current implementation only supports
2463 * a single size for superpages, SUPERPAGE_SIZE, per
2464 * architecture. As soon as more sizes are supposed
2465 * to be supported, SUPERPAGE_SIZE has to be replaced
2466 * with a lookup of the size depending on superpage_size.
2467 */
2468 #ifdef __x86_64__
2469 case SUPERPAGE_SIZE_ANY:
2470 /* handle it like 2 MB and round up to page size */
2471 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2472 OS_FALLTHROUGH;
2473 case SUPERPAGE_SIZE_2MB:
2474 break;
2475 #endif
2476 default:
2477 return KERN_INVALID_ARGUMENT;
2478 }
2479 mask = SUPERPAGE_SIZE - 1;
2480 if (size & (SUPERPAGE_SIZE - 1)) {
2481 return KERN_INVALID_ARGUMENT;
2482 }
2483 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2484 }
2485
2486
2487 if ((cur_protection & VM_PROT_WRITE) &&
2488 (cur_protection & VM_PROT_EXECUTE) &&
2489 #if XNU_TARGET_OS_OSX
2490 map->pmap != kernel_pmap &&
2491 (cs_process_global_enforcement() ||
2492 (vmk_flags.vmkf_cs_enforcement_override
2493 ? vmk_flags.vmkf_cs_enforcement
2494 : (vm_map_cs_enforcement(map)
2495 #if __arm64__
2496 || !VM_MAP_IS_EXOTIC(map)
2497 #endif /* __arm64__ */
2498 ))) &&
2499 #endif /* XNU_TARGET_OS_OSX */
2500 #if CODE_SIGNING_MONITOR
2501 (csm_address_space_exempt(map->pmap) != KERN_SUCCESS) &&
2502 #endif
2503 (VM_MAP_POLICY_WX_FAIL(map) ||
2504 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2505 !entry_for_jit) {
2506 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2507
2508 DTRACE_VM3(cs_wx,
2509 uint64_t, 0,
2510 uint64_t, 0,
2511 vm_prot_t, cur_protection);
2512 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2513 proc_selfpid(),
2514 (get_bsdtask_info(current_task())
2515 ? proc_name_address(get_bsdtask_info(current_task()))
2516 : "?"),
2517 __FUNCTION__,
2518 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2519 cur_protection &= ~VM_PROT_EXECUTE;
2520 if (vm_protect_wx_fail) {
2521 return KERN_PROTECTION_FAILURE;
2522 }
2523 }
2524
2525 /*
2526 * If the task has requested executable lockdown,
2527 * deny any new executable mapping.
2528 */
2529 if (map->map_disallow_new_exec == TRUE) {
2530 if (cur_protection & VM_PROT_EXECUTE) {
2531 return KERN_PROTECTION_FAILURE;
2532 }
2533 }
2534
2535 if (resilient_codesign) {
2536 assert(!is_submap);
2537 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2538 if ((cur_protection | max_protection) & reject_prot) {
2539 return KERN_PROTECTION_FAILURE;
2540 }
2541 }
2542
2543 if (resilient_media) {
2544 assert(!is_submap);
2545 // assert(!needs_copy);
2546 if (object != VM_OBJECT_NULL &&
2547 !object->internal) {
2548 /*
2549 * This mapping is directly backed by an external
2550 * memory manager (e.g. a vnode pager for a file):
2551 * we would not have any safe place to inject
2552 * a zero-filled page if an actual page is not
2553 * available, without possibly impacting the actual
2554 * contents of the mapped object (e.g. the file),
2555 * so we can't provide any media resiliency here.
2556 */
2557 return KERN_INVALID_ARGUMENT;
2558 }
2559 }
2560
2561 if (is_submap) {
2562 vm_map_t submap;
2563 if (purgable) {
2564 /* submaps can not be purgeable */
2565 return KERN_INVALID_ARGUMENT;
2566 }
2567 if (object == VM_OBJECT_NULL) {
2568 /* submaps can not be created lazily */
2569 return KERN_INVALID_ARGUMENT;
2570 }
2571 submap = (vm_map_t) object;
2572 if (VM_MAP_PAGE_SHIFT(submap) != VM_MAP_PAGE_SHIFT(map)) {
2573 /* page size mismatch */
2574 return KERN_INVALID_ARGUMENT;
2575 }
2576 }
2577 if (vmk_flags.vmkf_already) {
2578 /*
2579 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2580 * is already present. For it to be meaningul, the requested
2581 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2582 * we shouldn't try and remove what was mapped there first
2583 * (!VM_FLAGS_OVERWRITE).
2584 */
2585 if (!vmk_flags.vmf_fixed || vmk_flags.vmf_overwrite) {
2586 return KERN_INVALID_ARGUMENT;
2587 }
2588 }
2589
2590 if (size == 0 ||
2591 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2592 *address = 0;
2593 return KERN_INVALID_ARGUMENT;
2594 }
2595
2596 if (map->pmap == kernel_pmap) {
2597 user_alias = VM_KERN_MEMORY_NONE;
2598 } else {
2599 user_alias = alias;
2600 }
2601
2602 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2603 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2604 }
2605
2606 #define RETURN(value) { result = value; goto BailOut; }
2607
2608 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2609 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2610 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2611 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2612 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2613 }
2614
2615 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2616 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2617 /*
2618 * In most cases, the caller rounds the size up to the
2619 * map's page size.
2620 * If we get a size that is explicitly not map-aligned here,
2621 * we'll have to respect the caller's wish and mark the
2622 * mapping as "not map-aligned" to avoid tripping the
2623 * map alignment checks later.
2624 */
2625 clear_map_aligned = TRUE;
2626 }
2627 if (!anywhere &&
2628 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2629 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2630 /*
2631 * We've been asked to map at a fixed address and that
2632 * address is not aligned to the map's specific alignment.
2633 * The caller should know what it's doing (i.e. most likely
2634 * mapping some fragmented copy map, transferring memory from
2635 * a VM map with a different alignment), so clear map_aligned
2636 * for this new VM map entry and proceed.
2637 */
2638 clear_map_aligned = TRUE;
2639 }
2640
2641 /*
2642 * Only zero-fill objects are allowed to be purgable.
2643 * LP64todo - limit purgable objects to 32-bits for now
2644 */
2645 if (purgable &&
2646 (offset != 0 ||
2647 (object != VM_OBJECT_NULL &&
2648 (object->vo_size != size ||
2649 object->purgable == VM_PURGABLE_DENY))
2650 #if __LP64__
2651 || size > ANON_MAX_SIZE
2652 #endif
2653 )) {
2654 return KERN_INVALID_ARGUMENT;
2655 }
2656
2657 start = *address;
2658
2659 if (anywhere) {
2660 vm_map_lock(map);
2661 map_locked = TRUE;
2662
2663 result = vm_map_locate_space(map, size, mask, vmk_flags,
2664 &start, &entry);
2665 if (result != KERN_SUCCESS) {
2666 goto BailOut;
2667 }
2668
2669 *address = start;
2670 end = start + size;
2671 assert(VM_MAP_PAGE_ALIGNED(*address,
2672 VM_MAP_PAGE_MASK(map)));
2673 } else {
2674 vm_map_offset_t effective_min_offset, effective_max_offset;
2675
2676 effective_min_offset = map->min_offset;
2677 effective_max_offset = map->max_offset;
2678
2679 if (vmk_flags.vmkf_beyond_max) {
2680 /*
2681 * Allow an insertion beyond the map's max offset.
2682 */
2683 effective_max_offset = 0x00000000FFFFF000ULL;
2684 if (vm_map_is_64bit(map)) {
2685 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2686 }
2687 #if XNU_TARGET_OS_OSX
2688 } else if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2689 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2690 #endif /* XNU_TARGET_OS_OSX */
2691 }
2692
2693 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2694 !overwrite &&
2695 user_alias == VM_MEMORY_REALLOC) {
2696 /*
2697 * Force realloc() to switch to a new allocation,
2698 * to prevent 4k-fragmented virtual ranges.
2699 */
2700 // DEBUG4K_ERROR("no realloc in place");
2701 return KERN_NO_SPACE;
2702 }
2703
2704 /*
2705 * Verify that:
2706 * the address doesn't itself violate
2707 * the mask requirement.
2708 */
2709
2710 vm_map_lock(map);
2711 map_locked = TRUE;
2712 if ((start & mask) != 0) {
2713 RETURN(KERN_NO_SPACE);
2714 }
2715
2716 #if CONFIG_MAP_RANGES
2717 if (map->uses_user_ranges) {
2718 struct mach_vm_range r;
2719
2720 vm_map_user_range_resolve(map, start, 1, &r);
2721 if (r.max_address == 0) {
2722 RETURN(KERN_INVALID_ADDRESS);
2723 }
2724 effective_min_offset = r.min_address;
2725 effective_max_offset = r.max_address;
2726 }
2727 #endif /* CONFIG_MAP_RANGES */
2728
2729 if ((startup_phase >= STARTUP_SUB_KMEM) && !is_submap &&
2730 (map == kernel_map)) {
2731 mach_vm_range_t r = kmem_validate_range_for_overwrite(start, size);
2732 effective_min_offset = r->min_address;
2733 effective_max_offset = r->max_address;
2734 }
2735
2736 /*
2737 * ... the address is within bounds
2738 */
2739
2740 end = start + size;
2741
2742 if ((start < effective_min_offset) ||
2743 (end > effective_max_offset) ||
2744 (start >= end)) {
2745 RETURN(KERN_INVALID_ADDRESS);
2746 }
2747
2748 if (overwrite) {
2749 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
2750 kern_return_t remove_kr;
2751
2752 /*
2753 * Fixed mapping and "overwrite" flag: attempt to
2754 * remove all existing mappings in the specified
2755 * address range, saving them in our "zap_old_list".
2756 *
2757 * This avoids releasing the VM map lock in
2758 * vm_map_entry_delete() and allows atomicity
2759 * when we want to replace some mappings with a new one.
2760 * It also allows us to restore the old VM mappings if the
2761 * new mapping fails.
2762 */
2763 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
2764
2765 if (vmk_flags.vmkf_overwrite_immutable) {
2766 /* we can overwrite immutable mappings */
2767 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2768 }
2769 if (vmk_flags.vmkf_remap_prot_copy) {
2770 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
2771 }
2772 remove_kr = vm_map_delete(map, start, end, remove_flags,
2773 KMEM_GUARD_NONE, &zap_old_list).kmr_return;
2774 if (remove_kr) {
2775 /* XXX FBDP restore zap_old_list? */
2776 RETURN(remove_kr);
2777 }
2778 }
2779
2780 /*
2781 * ... the starting address isn't allocated
2782 */
2783
2784 if (vm_map_lookup_entry(map, start, &entry)) {
2785 if (!(vmk_flags.vmkf_already)) {
2786 RETURN(KERN_NO_SPACE);
2787 }
2788 /*
2789 * Check if what's already there is what we want.
2790 */
2791 tmp_start = start;
2792 tmp_offset = offset;
2793 if (entry->vme_start < start) {
2794 tmp_start -= start - entry->vme_start;
2795 tmp_offset -= start - entry->vme_start;
2796 }
2797 for (; entry->vme_start < end;
2798 entry = entry->vme_next) {
2799 /*
2800 * Check if the mapping's attributes
2801 * match the existing map entry.
2802 */
2803 if (entry == vm_map_to_entry(map) ||
2804 entry->vme_start != tmp_start ||
2805 entry->is_sub_map != is_submap ||
2806 VME_OFFSET(entry) != tmp_offset ||
2807 entry->needs_copy != needs_copy ||
2808 entry->protection != cur_protection ||
2809 entry->max_protection != max_protection ||
2810 entry->inheritance != inheritance ||
2811 entry->iokit_acct != iokit_acct ||
2812 VME_ALIAS(entry) != alias) {
2813 /* not the same mapping ! */
2814 RETURN(KERN_NO_SPACE);
2815 }
2816 /*
2817 * Check if the same object is being mapped.
2818 */
2819 if (is_submap) {
2820 if (VME_SUBMAP(entry) !=
2821 (vm_map_t) object) {
2822 /* not the same submap */
2823 RETURN(KERN_NO_SPACE);
2824 }
2825 } else {
2826 if (VME_OBJECT(entry) != object) {
2827 /* not the same VM object... */
2828 vm_object_t obj2;
2829
2830 obj2 = VME_OBJECT(entry);
2831 if ((obj2 == VM_OBJECT_NULL ||
2832 obj2->internal) &&
2833 (object == VM_OBJECT_NULL ||
2834 object->internal)) {
2835 /*
2836 * ... but both are
2837 * anonymous memory,
2838 * so equivalent.
2839 */
2840 } else {
2841 RETURN(KERN_NO_SPACE);
2842 }
2843 }
2844 }
2845
2846 tmp_offset += entry->vme_end - entry->vme_start;
2847 tmp_start += entry->vme_end - entry->vme_start;
2848 if (entry->vme_end >= end) {
2849 /* reached the end of our mapping */
2850 break;
2851 }
2852 }
2853 /* it all matches: let's use what's already there ! */
2854 RETURN(KERN_MEMORY_PRESENT);
2855 }
2856
2857 /*
2858 * ... the next region doesn't overlap the
2859 * end point.
2860 */
2861
2862 if ((entry->vme_next != vm_map_to_entry(map)) &&
2863 (entry->vme_next->vme_start < end)) {
2864 RETURN(KERN_NO_SPACE);
2865 }
2866 }
2867
2868 /*
2869 * At this point,
2870 * "start" and "end" should define the endpoints of the
2871 * available new range, and
2872 * "entry" should refer to the region before the new
2873 * range, and
2874 *
2875 * the map should be locked.
2876 */
2877
2878 /*
2879 * See whether we can avoid creating a new entry (and object) by
2880 * extending one of our neighbors. [So far, we only attempt to
2881 * extend from below.] Note that we can never extend/join
2882 * purgable objects because they need to remain distinct
2883 * entities in order to implement their "volatile object"
2884 * semantics.
2885 */
2886
2887 if (purgable ||
2888 entry_for_jit ||
2889 entry_for_tpro ||
2890 vm_memory_malloc_no_cow(user_alias)) {
2891 if (object == VM_OBJECT_NULL) {
2892 object = vm_object_allocate(size);
2893 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2894 object->true_share = FALSE;
2895 if (purgable) {
2896 task_t owner;
2897 object->purgable = VM_PURGABLE_NONVOLATILE;
2898 if (map->pmap == kernel_pmap) {
2899 /*
2900 * Purgeable mappings made in a kernel
2901 * map are "owned" by the kernel itself
2902 * rather than the current user task
2903 * because they're likely to be used by
2904 * more than this user task (see
2905 * execargs_purgeable_allocate(), for
2906 * example).
2907 */
2908 owner = kernel_task;
2909 } else {
2910 owner = current_task();
2911 }
2912 assert(object->vo_owner == NULL);
2913 assert(object->resident_page_count == 0);
2914 assert(object->wired_page_count == 0);
2915 vm_object_lock(object);
2916 vm_purgeable_nonvolatile_enqueue(object, owner);
2917 vm_object_unlock(object);
2918 }
2919 offset = (vm_object_offset_t)0;
2920 }
2921 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2922 /* no coalescing if address space uses sub-pages */
2923 } else if ((is_submap == FALSE) &&
2924 (object == VM_OBJECT_NULL) &&
2925 (entry != vm_map_to_entry(map)) &&
2926 (entry->vme_end == start) &&
2927 (!entry->is_shared) &&
2928 (!entry->is_sub_map) &&
2929 (!entry->in_transition) &&
2930 (!entry->needs_wakeup) &&
2931 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2932 (entry->protection == cur_protection) &&
2933 (entry->max_protection == max_protection) &&
2934 (entry->inheritance == inheritance) &&
2935 ((user_alias == VM_MEMORY_REALLOC) ||
2936 (VME_ALIAS(entry) == alias)) &&
2937 (entry->no_cache == no_cache) &&
2938 (entry->vme_permanent == permanent) &&
2939 /* no coalescing for immutable executable mappings */
2940 !((entry->protection & VM_PROT_EXECUTE) &&
2941 entry->vme_permanent) &&
2942 (!entry->superpage_size && !superpage_size) &&
2943 /*
2944 * No coalescing if not map-aligned, to avoid propagating
2945 * that condition any further than needed:
2946 */
2947 (!entry->map_aligned || !clear_map_aligned) &&
2948 (!entry->zero_wired_pages) &&
2949 (!entry->used_for_jit && !entry_for_jit) &&
2950 #if __arm64e__
2951 (!entry->used_for_tpro && !entry_for_tpro) &&
2952 #endif
2953 (!entry->csm_associated) &&
2954 (entry->iokit_acct == iokit_acct) &&
2955 (!entry->vme_resilient_codesign) &&
2956 (!entry->vme_resilient_media) &&
2957 (!entry->vme_atomic) &&
2958 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2959
2960 ((entry->vme_end - entry->vme_start) + size <=
2961 (user_alias == VM_MEMORY_REALLOC ?
2962 ANON_CHUNK_SIZE :
2963 NO_COALESCE_LIMIT)) &&
2964
2965 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2966 if (vm_object_coalesce(VME_OBJECT(entry),
2967 VM_OBJECT_NULL,
2968 VME_OFFSET(entry),
2969 (vm_object_offset_t) 0,
2970 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2971 (vm_map_size_t)(end - entry->vme_end))) {
2972 /*
2973 * Coalesced the two objects - can extend
2974 * the previous map entry to include the
2975 * new range.
2976 */
2977 map->size += (end - entry->vme_end);
2978 assert(entry->vme_start < end);
2979 assert(VM_MAP_PAGE_ALIGNED(end,
2980 VM_MAP_PAGE_MASK(map)));
2981 if (__improbable(vm_debug_events)) {
2982 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2983 }
2984 entry->vme_end = end;
2985 if (map->holelistenabled) {
2986 vm_map_store_update_first_free(map, entry, TRUE);
2987 } else {
2988 vm_map_store_update_first_free(map, map->first_free, TRUE);
2989 }
2990 new_mapping_established = TRUE;
2991 RETURN(KERN_SUCCESS);
2992 }
2993 }
2994
2995 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2996 new_entry = NULL;
2997
2998 if (vmk_flags.vmkf_submap_adjust) {
2999 vm_map_adjust_offsets((vm_map_t)caller_object, start, end);
3000 offset = start;
3001 }
3002
3003 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
3004 tmp2_end = tmp2_start + step;
3005 /*
3006 * Create a new entry
3007 *
3008 * XXX FBDP
3009 * The reserved "page zero" in each process's address space can
3010 * be arbitrarily large. Splitting it into separate objects and
3011 * therefore different VM map entries serves no purpose and just
3012 * slows down operations on the VM map, so let's not split the
3013 * allocation into chunks if the max protection is NONE. That
3014 * memory should never be accessible, so it will never get to the
3015 * default pager.
3016 */
3017 tmp_start = tmp2_start;
3018 if (!is_submap &&
3019 object == VM_OBJECT_NULL &&
3020 size > chunk_size &&
3021 max_protection != VM_PROT_NONE &&
3022 superpage_size == 0) {
3023 tmp_end = tmp_start + chunk_size;
3024 } else {
3025 tmp_end = tmp2_end;
3026 }
3027 do {
3028 if (!is_submap &&
3029 object != VM_OBJECT_NULL &&
3030 object->internal &&
3031 offset + (tmp_end - tmp_start) > object->vo_size) {
3032 // printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
3033 DTRACE_VM5(vm_map_enter_overmap,
3034 vm_map_t, map,
3035 vm_map_address_t, tmp_start,
3036 vm_map_address_t, tmp_end,
3037 vm_object_offset_t, offset,
3038 vm_object_size_t, object->vo_size);
3039 }
3040 new_entry = vm_map_entry_insert(map,
3041 entry, tmp_start, tmp_end,
3042 object, offset, vmk_flags,
3043 needs_copy,
3044 cur_protection, max_protection,
3045 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3046 VM_INHERIT_NONE : inheritance),
3047 clear_map_aligned);
3048
3049 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3050
3051 if (resilient_codesign) {
3052 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3053 if (!((cur_protection | max_protection) & reject_prot)) {
3054 new_entry->vme_resilient_codesign = TRUE;
3055 }
3056 }
3057
3058 if (resilient_media &&
3059 (object == VM_OBJECT_NULL ||
3060 object->internal)) {
3061 new_entry->vme_resilient_media = TRUE;
3062 }
3063
3064 assert(!new_entry->iokit_acct);
3065 if (!is_submap &&
3066 object != VM_OBJECT_NULL &&
3067 (object->purgable != VM_PURGABLE_DENY ||
3068 object->vo_ledger_tag)) {
3069 assert(new_entry->use_pmap);
3070 assert(!new_entry->iokit_acct);
3071 /*
3072 * Turn off pmap accounting since
3073 * purgeable (or tagged) objects have their
3074 * own ledgers.
3075 */
3076 new_entry->use_pmap = FALSE;
3077 } else if (!is_submap &&
3078 iokit_acct &&
3079 object != VM_OBJECT_NULL &&
3080 object->internal) {
3081 /* alternate accounting */
3082 assert(!new_entry->iokit_acct);
3083 assert(new_entry->use_pmap);
3084 new_entry->iokit_acct = TRUE;
3085 new_entry->use_pmap = FALSE;
3086 DTRACE_VM4(
3087 vm_map_iokit_mapped_region,
3088 vm_map_t, map,
3089 vm_map_offset_t, new_entry->vme_start,
3090 vm_map_offset_t, new_entry->vme_end,
3091 int, VME_ALIAS(new_entry));
3092 vm_map_iokit_mapped_region(
3093 map,
3094 (new_entry->vme_end -
3095 new_entry->vme_start));
3096 } else if (!is_submap) {
3097 assert(!new_entry->iokit_acct);
3098 assert(new_entry->use_pmap);
3099 }
3100
3101 if (is_submap) {
3102 vm_map_t submap;
3103 boolean_t submap_is_64bit;
3104 boolean_t use_pmap;
3105
3106 assert(new_entry->is_sub_map);
3107 assert(!new_entry->use_pmap);
3108 assert(!new_entry->iokit_acct);
3109 submap = (vm_map_t) object;
3110 submap_is_64bit = vm_map_is_64bit(submap);
3111 use_pmap = vmk_flags.vmkf_nested_pmap;
3112 #ifndef NO_NESTED_PMAP
3113 if (use_pmap && submap->pmap == NULL) {
3114 ledger_t ledger = map->pmap->ledger;
3115 /* we need a sub pmap to nest... */
3116 submap->pmap = pmap_create_options(ledger, 0,
3117 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3118 if (submap->pmap == NULL) {
3119 /* let's proceed without nesting... */
3120 }
3121 #if defined(__arm64__)
3122 else {
3123 pmap_set_nested(submap->pmap);
3124 }
3125 #endif
3126 }
3127 if (use_pmap && submap->pmap != NULL) {
3128 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3129 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3130 kr = KERN_FAILURE;
3131 } else {
3132 kr = pmap_nest(map->pmap,
3133 submap->pmap,
3134 tmp_start,
3135 tmp_end - tmp_start);
3136 }
3137 if (kr != KERN_SUCCESS) {
3138 printf("vm_map_enter: "
3139 "pmap_nest(0x%llx,0x%llx) "
3140 "error 0x%x\n",
3141 (long long)tmp_start,
3142 (long long)tmp_end,
3143 kr);
3144 } else {
3145 /* we're now nested ! */
3146 new_entry->use_pmap = TRUE;
3147 pmap_empty = FALSE;
3148 }
3149 }
3150 #endif /* NO_NESTED_PMAP */
3151 }
3152 entry = new_entry;
3153
3154 if (superpage_size) {
3155 vm_page_t pages, m;
3156 vm_object_t sp_object;
3157 vm_object_offset_t sp_offset;
3158
3159 VME_OFFSET_SET(entry, 0);
3160
3161 /* allocate one superpage */
3162 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3163 if (kr != KERN_SUCCESS) {
3164 /* deallocate whole range... */
3165 new_mapping_established = TRUE;
3166 /* ... but only up to "tmp_end" */
3167 size -= end - tmp_end;
3168 RETURN(kr);
3169 }
3170
3171 /* create one vm_object per superpage */
3172 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3173 sp_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3174 sp_object->phys_contiguous = TRUE;
3175 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3176 VME_OBJECT_SET(entry, sp_object, false, 0);
3177 assert(entry->use_pmap);
3178
3179 /* enter the base pages into the object */
3180 vm_object_lock(sp_object);
3181 for (sp_offset = 0;
3182 sp_offset < SUPERPAGE_SIZE;
3183 sp_offset += PAGE_SIZE) {
3184 m = pages;
3185 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3186 pages = NEXT_PAGE(m);
3187 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3188 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3189 }
3190 vm_object_unlock(sp_object);
3191 }
3192 } while (tmp_end != tmp2_end &&
3193 (tmp_start = tmp_end) &&
3194 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3195 tmp_end + chunk_size : tmp2_end));
3196 }
3197
3198 new_mapping_established = TRUE;
3199
3200 BailOut:
3201 assert(map_locked == TRUE);
3202
3203 /*
3204 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3205 * If we have identified and possibly established the new mapping(s),
3206 * make sure we did not go beyond the address space limit.
3207 */
3208 if (result == KERN_SUCCESS) {
3209 if (map->size_limit != RLIM_INFINITY &&
3210 map->size > map->size_limit) {
3211 /*
3212 * Establishing the requested mappings would exceed
3213 * the process's RLIMIT_AS limit: fail with
3214 * KERN_NO_SPACE.
3215 */
3216 result = KERN_NO_SPACE;
3217 printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3218 proc_selfpid(),
3219 (get_bsdtask_info(current_task())
3220 ? proc_name_address(get_bsdtask_info(current_task()))
3221 : "?"),
3222 __FUNCTION__,
3223 (uint64_t) map->size,
3224 (uint64_t) map->size_limit);
3225 DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3226 vm_map_size_t, map->size,
3227 uint64_t, map->size_limit);
3228 vm_map_enter_RLIMIT_AS_count++;
3229 } else if (map->data_limit != RLIM_INFINITY &&
3230 map->size > map->data_limit) {
3231 /*
3232 * Establishing the requested mappings would exceed
3233 * the process's RLIMIT_DATA limit: fail with
3234 * KERN_NO_SPACE.
3235 */
3236 result = KERN_NO_SPACE;
3237 printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3238 proc_selfpid(),
3239 (get_bsdtask_info(current_task())
3240 ? proc_name_address(get_bsdtask_info(current_task()))
3241 : "?"),
3242 __FUNCTION__,
3243 (uint64_t) map->size,
3244 (uint64_t) map->data_limit);
3245 DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3246 vm_map_size_t, map->size,
3247 uint64_t, map->data_limit);
3248 vm_map_enter_RLIMIT_DATA_count++;
3249 }
3250 }
3251
3252 if (result == KERN_SUCCESS) {
3253 vm_prot_t pager_prot;
3254 memory_object_t pager;
3255
3256 #if DEBUG
3257 if (pmap_empty &&
3258 !(vmk_flags.vmkf_no_pmap_check)) {
3259 assert(pmap_is_empty(map->pmap,
3260 *address,
3261 *address + size));
3262 }
3263 #endif /* DEBUG */
3264
3265 /*
3266 * For "named" VM objects, let the pager know that the
3267 * memory object is being mapped. Some pagers need to keep
3268 * track of this, to know when they can reclaim the memory
3269 * object, for example.
3270 * VM calls memory_object_map() for each mapping (specifying
3271 * the protection of each mapping) and calls
3272 * memory_object_last_unmap() when all the mappings are gone.
3273 */
3274 pager_prot = max_protection;
3275 if (needs_copy) {
3276 /*
3277 * Copy-On-Write mapping: won't modify
3278 * the memory object.
3279 */
3280 pager_prot &= ~VM_PROT_WRITE;
3281 }
3282 if (!is_submap &&
3283 object != VM_OBJECT_NULL &&
3284 object->named &&
3285 object->pager != MEMORY_OBJECT_NULL) {
3286 vm_object_lock(object);
3287 pager = object->pager;
3288 if (object->named &&
3289 pager != MEMORY_OBJECT_NULL) {
3290 assert(object->pager_ready);
3291 vm_object_mapping_wait(object, THREAD_UNINT);
3292 vm_object_mapping_begin(object);
3293 vm_object_unlock(object);
3294
3295 kr = memory_object_map(pager, pager_prot);
3296 assert(kr == KERN_SUCCESS);
3297
3298 vm_object_lock(object);
3299 vm_object_mapping_end(object);
3300 }
3301 vm_object_unlock(object);
3302 }
3303 }
3304
3305 assert(map_locked == TRUE);
3306
3307 if (new_mapping_established) {
3308 /*
3309 * If we release the map lock for any reason below,
3310 * another thread could deallocate our new mapping,
3311 * releasing the caller's reference on "caller_object",
3312 * which was transferred to the mapping.
3313 * If this was the only reference, the object could be
3314 * destroyed.
3315 *
3316 * We need to take an extra reference on "caller_object"
3317 * to keep it alive if we need to return the caller's
3318 * reference to the caller in case of failure.
3319 */
3320 if (is_submap) {
3321 vm_map_reference((vm_map_t)caller_object);
3322 } else {
3323 vm_object_reference(caller_object);
3324 }
3325 }
3326
3327 if (!keep_map_locked) {
3328 vm_map_unlock(map);
3329 map_locked = FALSE;
3330 entry = VM_MAP_ENTRY_NULL;
3331 new_entry = VM_MAP_ENTRY_NULL;
3332 }
3333
3334 /*
3335 * We can't hold the map lock if we enter this block.
3336 */
3337
3338 if (result == KERN_SUCCESS) {
3339 /* Wire down the new entry if the user
3340 * requested all new map entries be wired.
3341 */
3342 if ((map->wiring_required) || (superpage_size)) {
3343 assert(!keep_map_locked);
3344 pmap_empty = FALSE; /* pmap won't be empty */
3345 kr = vm_map_wire_kernel(map, start, end,
3346 cur_protection, VM_KERN_MEMORY_MLOCK,
3347 TRUE);
3348 result = kr;
3349 }
3350
3351 }
3352
3353 if (result != KERN_SUCCESS) {
3354 if (new_mapping_established) {
3355 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
3356
3357 /*
3358 * We have to get rid of the new mappings since we
3359 * won't make them available to the user.
3360 * Try and do that atomically, to minimize the risk
3361 * that someone else create new mappings that range.
3362 */
3363 if (!map_locked) {
3364 vm_map_lock(map);
3365 map_locked = TRUE;
3366 }
3367 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
3368 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
3369 if (permanent) {
3370 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
3371 }
3372 (void) vm_map_delete(map,
3373 *address, *address + size,
3374 remove_flags,
3375 KMEM_GUARD_NONE, &zap_new_list);
3376 }
3377
3378 if (vm_map_zap_first_entry(&zap_old_list)) {
3379 vm_map_entry_t entry1, entry2;
3380
3381 /*
3382 * The new mapping failed. Attempt to restore
3383 * the old mappings, saved in the "zap_old_map".
3384 */
3385 if (!map_locked) {
3386 vm_map_lock(map);
3387 map_locked = TRUE;
3388 }
3389
3390 /* first check if the coast is still clear */
3391 start = vm_map_zap_first_entry(&zap_old_list)->vme_start;
3392 end = vm_map_zap_last_entry(&zap_old_list)->vme_end;
3393
3394 if (vm_map_lookup_entry(map, start, &entry1) ||
3395 vm_map_lookup_entry(map, end, &entry2) ||
3396 entry1 != entry2) {
3397 /*
3398 * Part of that range has already been
3399 * re-mapped: we can't restore the old
3400 * mappings...
3401 */
3402 vm_map_enter_restore_failures++;
3403 } else {
3404 /*
3405 * Transfer the saved map entries from
3406 * "zap_old_map" to the original "map",
3407 * inserting them all after "entry1".
3408 */
3409 while ((entry2 = vm_map_zap_pop(&zap_old_list))) {
3410 vm_map_size_t entry_size;
3411
3412 entry_size = (entry2->vme_end -
3413 entry2->vme_start);
3414 vm_map_store_entry_link(map, entry1, entry2,
3415 VM_MAP_KERNEL_FLAGS_NONE);
3416 map->size += entry_size;
3417 entry1 = entry2;
3418 }
3419 if (map->wiring_required) {
3420 /*
3421 * XXX TODO: we should rewire the
3422 * old pages here...
3423 */
3424 }
3425 vm_map_enter_restore_successes++;
3426 }
3427 }
3428 }
3429
3430 /*
3431 * The caller is responsible for releasing the lock if it requested to
3432 * keep the map locked.
3433 */
3434 if (map_locked && !keep_map_locked) {
3435 vm_map_unlock(map);
3436 }
3437
3438 vm_map_zap_dispose(&zap_old_list);
3439 vm_map_zap_dispose(&zap_new_list);
3440
3441 if (new_mapping_established) {
3442 /*
3443 * The caller had a reference on "caller_object" and we
3444 * transferred that reference to the mapping.
3445 * We also took an extra reference on "caller_object" to keep
3446 * it alive while the map was unlocked.
3447 */
3448 if (result == KERN_SUCCESS) {
3449 /*
3450 * On success, the caller's reference on the object gets
3451 * tranferred to the mapping.
3452 * Release our extra reference.
3453 */
3454 if (is_submap) {
3455 vm_map_deallocate((vm_map_t)caller_object);
3456 } else {
3457 vm_object_deallocate(caller_object);
3458 }
3459 } else {
3460 /*
3461 * On error, the caller expects to still have a
3462 * reference on the object it gave us.
3463 * Let's use our extra reference for that.
3464 */
3465 }
3466 }
3467
3468 return result;
3469
3470 #undef RETURN
3471 }
3472
3473 #if __arm64__
3474 extern const struct memory_object_pager_ops fourk_pager_ops;
3475 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3476 vm_map_enter_fourk(
3477 vm_map_t map,
3478 vm_map_offset_t *address, /* IN/OUT */
3479 vm_map_size_t size,
3480 vm_map_offset_t mask,
3481 vm_map_kernel_flags_t vmk_flags,
3482 vm_object_t object,
3483 vm_object_offset_t offset,
3484 boolean_t needs_copy,
3485 vm_prot_t cur_protection,
3486 vm_prot_t max_protection,
3487 vm_inherit_t inheritance)
3488 {
3489 vm_map_entry_t entry, new_entry;
3490 vm_map_offset_t start, fourk_start;
3491 vm_map_offset_t end, fourk_end;
3492 vm_map_size_t fourk_size;
3493 kern_return_t result = KERN_SUCCESS;
3494 boolean_t map_locked = FALSE;
3495 boolean_t pmap_empty = TRUE;
3496 boolean_t new_mapping_established = FALSE;
3497 const bool keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3498 const bool anywhere = !vmk_flags.vmf_fixed;
3499 const bool purgable = vmk_flags.vmf_purgeable;
3500 const bool overwrite = vmk_flags.vmf_overwrite;
3501 const bool is_submap = vmk_flags.vmkf_submap;
3502 const bool entry_for_jit = vmk_flags.vmkf_map_jit;
3503 const unsigned int superpage_size = vmk_flags.vmf_superpage_size;
3504 vm_map_offset_t effective_min_offset, effective_max_offset;
3505 kern_return_t kr;
3506 boolean_t clear_map_aligned = FALSE;
3507 memory_object_t fourk_mem_obj;
3508 vm_object_t fourk_object;
3509 vm_map_offset_t fourk_pager_offset;
3510 int fourk_pager_index_start, fourk_pager_index_num;
3511 int cur_idx;
3512 boolean_t fourk_copy;
3513 vm_object_t copy_object;
3514 vm_object_offset_t copy_offset;
3515 VM_MAP_ZAP_DECLARE(zap_list);
3516
3517 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3518 panic("%s:%d", __FUNCTION__, __LINE__);
3519 }
3520 fourk_mem_obj = MEMORY_OBJECT_NULL;
3521 fourk_object = VM_OBJECT_NULL;
3522
3523 if (superpage_size) {
3524 return KERN_NOT_SUPPORTED;
3525 }
3526
3527 if ((cur_protection & VM_PROT_WRITE) &&
3528 (cur_protection & VM_PROT_EXECUTE) &&
3529 #if XNU_TARGET_OS_OSX
3530 map->pmap != kernel_pmap &&
3531 (vm_map_cs_enforcement(map)
3532 #if __arm64__
3533 || !VM_MAP_IS_EXOTIC(map)
3534 #endif /* __arm64__ */
3535 ) &&
3536 #endif /* XNU_TARGET_OS_OSX */
3537 #if CODE_SIGNING_MONITOR
3538 (csm_address_space_exempt(map->pmap) != KERN_SUCCESS) &&
3539 #endif
3540 !entry_for_jit) {
3541 DTRACE_VM3(cs_wx,
3542 uint64_t, 0,
3543 uint64_t, 0,
3544 vm_prot_t, cur_protection);
3545 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3546 "turning off execute\n",
3547 proc_selfpid(),
3548 (get_bsdtask_info(current_task())
3549 ? proc_name_address(get_bsdtask_info(current_task()))
3550 : "?"),
3551 __FUNCTION__);
3552 cur_protection &= ~VM_PROT_EXECUTE;
3553 }
3554
3555 /*
3556 * If the task has requested executable lockdown,
3557 * deny any new executable mapping.
3558 */
3559 if (map->map_disallow_new_exec == TRUE) {
3560 if (cur_protection & VM_PROT_EXECUTE) {
3561 return KERN_PROTECTION_FAILURE;
3562 }
3563 }
3564
3565 if (is_submap) {
3566 return KERN_NOT_SUPPORTED;
3567 }
3568 if (vmk_flags.vmkf_already) {
3569 return KERN_NOT_SUPPORTED;
3570 }
3571 if (purgable || entry_for_jit) {
3572 return KERN_NOT_SUPPORTED;
3573 }
3574
3575 effective_min_offset = map->min_offset;
3576
3577 if (vmk_flags.vmkf_beyond_max) {
3578 return KERN_NOT_SUPPORTED;
3579 } else {
3580 effective_max_offset = map->max_offset;
3581 }
3582
3583 if (size == 0 ||
3584 (offset & FOURK_PAGE_MASK) != 0) {
3585 *address = 0;
3586 return KERN_INVALID_ARGUMENT;
3587 }
3588
3589 #define RETURN(value) { result = value; goto BailOut; }
3590
3591 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3592 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3593
3594 if (!anywhere && overwrite) {
3595 return KERN_NOT_SUPPORTED;
3596 }
3597
3598 fourk_start = *address;
3599 fourk_size = size;
3600 fourk_end = fourk_start + fourk_size;
3601
3602 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3603 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3604 size = end - start;
3605
3606 if (anywhere) {
3607 return KERN_NOT_SUPPORTED;
3608 } else {
3609 /*
3610 * Verify that:
3611 * the address doesn't itself violate
3612 * the mask requirement.
3613 */
3614
3615 vm_map_lock(map);
3616 map_locked = TRUE;
3617 if ((start & mask) != 0) {
3618 RETURN(KERN_NO_SPACE);
3619 }
3620
3621 /*
3622 * ... the address is within bounds
3623 */
3624
3625 end = start + size;
3626
3627 if ((start < effective_min_offset) ||
3628 (end > effective_max_offset) ||
3629 (start >= end)) {
3630 RETURN(KERN_INVALID_ADDRESS);
3631 }
3632
3633 /*
3634 * ... the starting address isn't allocated
3635 */
3636 if (vm_map_lookup_entry(map, start, &entry)) {
3637 vm_object_t cur_object, shadow_object;
3638
3639 /*
3640 * We might already some 4K mappings
3641 * in a 16K page here.
3642 */
3643
3644 if (entry->vme_end - entry->vme_start
3645 != SIXTEENK_PAGE_SIZE) {
3646 RETURN(KERN_NO_SPACE);
3647 }
3648 if (entry->is_sub_map) {
3649 RETURN(KERN_NO_SPACE);
3650 }
3651 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3652 RETURN(KERN_NO_SPACE);
3653 }
3654
3655 /* go all the way down the shadow chain */
3656 cur_object = VME_OBJECT(entry);
3657 vm_object_lock(cur_object);
3658 while (cur_object->shadow != VM_OBJECT_NULL) {
3659 shadow_object = cur_object->shadow;
3660 vm_object_lock(shadow_object);
3661 vm_object_unlock(cur_object);
3662 cur_object = shadow_object;
3663 shadow_object = VM_OBJECT_NULL;
3664 }
3665 if (cur_object->internal ||
3666 cur_object->pager == NULL) {
3667 vm_object_unlock(cur_object);
3668 RETURN(KERN_NO_SPACE);
3669 }
3670 if (cur_object->pager->mo_pager_ops
3671 != &fourk_pager_ops) {
3672 vm_object_unlock(cur_object);
3673 RETURN(KERN_NO_SPACE);
3674 }
3675 fourk_object = cur_object;
3676 fourk_mem_obj = fourk_object->pager;
3677
3678 /* keep the "4K" object alive */
3679 vm_object_reference_locked(fourk_object);
3680 memory_object_reference(fourk_mem_obj);
3681 vm_object_unlock(fourk_object);
3682
3683 /* merge permissions */
3684 entry->protection |= cur_protection;
3685 entry->max_protection |= max_protection;
3686
3687 if ((entry->protection & VM_PROT_WRITE) &&
3688 (entry->protection & VM_PROT_ALLEXEC) &&
3689 fourk_binary_compatibility_unsafe &&
3690 fourk_binary_compatibility_allow_wx) {
3691 /* write+execute: need to be "jit" */
3692 entry->used_for_jit = TRUE;
3693 }
3694 goto map_in_fourk_pager;
3695 }
3696
3697 /*
3698 * ... the next region doesn't overlap the
3699 * end point.
3700 */
3701
3702 if ((entry->vme_next != vm_map_to_entry(map)) &&
3703 (entry->vme_next->vme_start < end)) {
3704 RETURN(KERN_NO_SPACE);
3705 }
3706 }
3707
3708 /*
3709 * At this point,
3710 * "start" and "end" should define the endpoints of the
3711 * available new range, and
3712 * "entry" should refer to the region before the new
3713 * range, and
3714 *
3715 * the map should be locked.
3716 */
3717
3718 /* create a new "4K" pager */
3719 fourk_mem_obj = fourk_pager_create();
3720 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3721 assert(fourk_object);
3722
3723 /* keep the "4" object alive */
3724 vm_object_reference(fourk_object);
3725
3726 /* create a "copy" object, to map the "4K" object copy-on-write */
3727 fourk_copy = TRUE;
3728 result = vm_object_copy_strategically(fourk_object,
3729 0,
3730 end - start,
3731 ©_object,
3732 ©_offset,
3733 &fourk_copy);
3734 assert(result == KERN_SUCCESS);
3735 assert(copy_object != VM_OBJECT_NULL);
3736 assert(copy_offset == 0);
3737
3738 /* map the "4K" pager's copy object */
3739 new_entry = vm_map_entry_insert(map,
3740 entry,
3741 vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map)),
3742 vm_map_round_page(end, VM_MAP_PAGE_MASK(map)),
3743 copy_object,
3744 0, /* offset */
3745 vmk_flags,
3746 FALSE, /* needs_copy */
3747 cur_protection, max_protection,
3748 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3749 VM_INHERIT_NONE : inheritance),
3750 clear_map_aligned);
3751 entry = new_entry;
3752
3753 #if VM_MAP_DEBUG_FOURK
3754 if (vm_map_debug_fourk) {
3755 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3756 map,
3757 (uint64_t) entry->vme_start,
3758 (uint64_t) entry->vme_end,
3759 fourk_mem_obj);
3760 }
3761 #endif /* VM_MAP_DEBUG_FOURK */
3762
3763 new_mapping_established = TRUE;
3764
3765 map_in_fourk_pager:
3766 /* "map" the original "object" where it belongs in the "4K" pager */
3767 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3768 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3769 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3770 fourk_pager_index_num = 4;
3771 } else {
3772 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3773 }
3774 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3775 fourk_pager_index_num = 4 - fourk_pager_index_start;
3776 }
3777 for (cur_idx = 0;
3778 cur_idx < fourk_pager_index_num;
3779 cur_idx++) {
3780 vm_object_t old_object;
3781 vm_object_offset_t old_offset;
3782
3783 kr = fourk_pager_populate(fourk_mem_obj,
3784 TRUE, /* overwrite */
3785 fourk_pager_index_start + cur_idx,
3786 object,
3787 (object
3788 ? (offset +
3789 (cur_idx * FOURK_PAGE_SIZE))
3790 : 0),
3791 &old_object,
3792 &old_offset);
3793 #if VM_MAP_DEBUG_FOURK
3794 if (vm_map_debug_fourk) {
3795 if (old_object == (vm_object_t) -1 &&
3796 old_offset == (vm_object_offset_t) -1) {
3797 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3798 "pager [%p:0x%llx] "
3799 "populate[%d] "
3800 "[object:%p,offset:0x%llx]\n",
3801 map,
3802 (uint64_t) entry->vme_start,
3803 (uint64_t) entry->vme_end,
3804 fourk_mem_obj,
3805 VME_OFFSET(entry),
3806 fourk_pager_index_start + cur_idx,
3807 object,
3808 (object
3809 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3810 : 0));
3811 } else {
3812 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3813 "pager [%p:0x%llx] "
3814 "populate[%d] [object:%p,offset:0x%llx] "
3815 "old [%p:0x%llx]\n",
3816 map,
3817 (uint64_t) entry->vme_start,
3818 (uint64_t) entry->vme_end,
3819 fourk_mem_obj,
3820 VME_OFFSET(entry),
3821 fourk_pager_index_start + cur_idx,
3822 object,
3823 (object
3824 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3825 : 0),
3826 old_object,
3827 old_offset);
3828 }
3829 }
3830 #endif /* VM_MAP_DEBUG_FOURK */
3831
3832 assert(kr == KERN_SUCCESS);
3833 if (object != old_object &&
3834 object != VM_OBJECT_NULL &&
3835 object != (vm_object_t) -1) {
3836 vm_object_reference(object);
3837 }
3838 if (object != old_object &&
3839 old_object != VM_OBJECT_NULL &&
3840 old_object != (vm_object_t) -1) {
3841 vm_object_deallocate(old_object);
3842 }
3843 }
3844
3845 BailOut:
3846 assert(map_locked == TRUE);
3847
3848 if (result == KERN_SUCCESS) {
3849 vm_prot_t pager_prot;
3850 memory_object_t pager;
3851
3852 #if DEBUG
3853 if (pmap_empty &&
3854 !(vmk_flags.vmkf_no_pmap_check)) {
3855 assert(pmap_is_empty(map->pmap,
3856 *address,
3857 *address + size));
3858 }
3859 #endif /* DEBUG */
3860
3861 /*
3862 * For "named" VM objects, let the pager know that the
3863 * memory object is being mapped. Some pagers need to keep
3864 * track of this, to know when they can reclaim the memory
3865 * object, for example.
3866 * VM calls memory_object_map() for each mapping (specifying
3867 * the protection of each mapping) and calls
3868 * memory_object_last_unmap() when all the mappings are gone.
3869 */
3870 pager_prot = max_protection;
3871 if (needs_copy) {
3872 /*
3873 * Copy-On-Write mapping: won't modify
3874 * the memory object.
3875 */
3876 pager_prot &= ~VM_PROT_WRITE;
3877 }
3878 if (!is_submap &&
3879 object != VM_OBJECT_NULL &&
3880 object->named &&
3881 object->pager != MEMORY_OBJECT_NULL) {
3882 vm_object_lock(object);
3883 pager = object->pager;
3884 if (object->named &&
3885 pager != MEMORY_OBJECT_NULL) {
3886 assert(object->pager_ready);
3887 vm_object_mapping_wait(object, THREAD_UNINT);
3888 vm_object_mapping_begin(object);
3889 vm_object_unlock(object);
3890
3891 kr = memory_object_map(pager, pager_prot);
3892 assert(kr == KERN_SUCCESS);
3893
3894 vm_object_lock(object);
3895 vm_object_mapping_end(object);
3896 }
3897 vm_object_unlock(object);
3898 }
3899 if (!is_submap &&
3900 fourk_object != VM_OBJECT_NULL &&
3901 fourk_object->named &&
3902 fourk_object->pager != MEMORY_OBJECT_NULL) {
3903 vm_object_lock(fourk_object);
3904 pager = fourk_object->pager;
3905 if (fourk_object->named &&
3906 pager != MEMORY_OBJECT_NULL) {
3907 assert(fourk_object->pager_ready);
3908 vm_object_mapping_wait(fourk_object,
3909 THREAD_UNINT);
3910 vm_object_mapping_begin(fourk_object);
3911 vm_object_unlock(fourk_object);
3912
3913 kr = memory_object_map(pager, VM_PROT_READ);
3914 assert(kr == KERN_SUCCESS);
3915
3916 vm_object_lock(fourk_object);
3917 vm_object_mapping_end(fourk_object);
3918 }
3919 vm_object_unlock(fourk_object);
3920 }
3921 }
3922
3923 if (fourk_object != VM_OBJECT_NULL) {
3924 vm_object_deallocate(fourk_object);
3925 fourk_object = VM_OBJECT_NULL;
3926 memory_object_deallocate(fourk_mem_obj);
3927 fourk_mem_obj = MEMORY_OBJECT_NULL;
3928 }
3929
3930 assert(map_locked == TRUE);
3931
3932 if (!keep_map_locked) {
3933 vm_map_unlock(map);
3934 map_locked = FALSE;
3935 }
3936
3937 /*
3938 * We can't hold the map lock if we enter this block.
3939 */
3940
3941 if (result == KERN_SUCCESS) {
3942 /* Wire down the new entry if the user
3943 * requested all new map entries be wired.
3944 */
3945 if ((map->wiring_required) || (superpage_size)) {
3946 assert(!keep_map_locked);
3947 pmap_empty = FALSE; /* pmap won't be empty */
3948 kr = vm_map_wire_kernel(map, start, end,
3949 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3950 TRUE);
3951 result = kr;
3952 }
3953
3954 }
3955
3956 if (result != KERN_SUCCESS) {
3957 if (new_mapping_established) {
3958 /*
3959 * We have to get rid of the new mappings since we
3960 * won't make them available to the user.
3961 * Try and do that atomically, to minimize the risk
3962 * that someone else create new mappings that range.
3963 */
3964
3965 if (!map_locked) {
3966 vm_map_lock(map);
3967 map_locked = TRUE;
3968 }
3969 (void)vm_map_delete(map, *address, *address + size,
3970 VM_MAP_REMOVE_NO_MAP_ALIGN | VM_MAP_REMOVE_NO_YIELD,
3971 KMEM_GUARD_NONE, &zap_list);
3972 }
3973 }
3974
3975 /*
3976 * The caller is responsible for releasing the lock if it requested to
3977 * keep the map locked.
3978 */
3979 if (map_locked && !keep_map_locked) {
3980 vm_map_unlock(map);
3981 }
3982
3983 vm_map_zap_dispose(&zap_list);
3984
3985 return result;
3986
3987 #undef RETURN
3988 }
3989 #endif /* __arm64__ */
3990
3991 /*
3992 * Counters for the prefault optimization.
3993 */
3994 int64_t vm_prefault_nb_pages = 0;
3995 int64_t vm_prefault_nb_bailout = 0;
3996
3997 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)3998 vm_map_enter_mem_object_helper(
3999 vm_map_t target_map,
4000 vm_map_offset_t *address,
4001 vm_map_size_t initial_size,
4002 vm_map_offset_t mask,
4003 vm_map_kernel_flags_t vmk_flags,
4004 ipc_port_t port,
4005 vm_object_offset_t offset,
4006 boolean_t copy,
4007 vm_prot_t cur_protection,
4008 vm_prot_t max_protection,
4009 vm_inherit_t inheritance,
4010 upl_page_list_ptr_t page_list,
4011 unsigned int page_list_count)
4012 {
4013 vm_map_address_t map_addr;
4014 vm_map_size_t map_size;
4015 vm_object_t object;
4016 vm_object_size_t size;
4017 kern_return_t result;
4018 boolean_t mask_cur_protection, mask_max_protection;
4019 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4020 vm_map_offset_t offset_in_mapping = 0;
4021 #if __arm64__
4022 boolean_t fourk = vmk_flags.vmkf_fourk;
4023 #endif /* __arm64__ */
4024
4025 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4026 /* XXX TODO4K prefaulting depends on page size... */
4027 try_prefault = FALSE;
4028 }
4029
4030 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4031 vm_map_kernel_flags_update_range_id(&vmk_flags, target_map);
4032
4033 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4034 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4035 cur_protection &= ~VM_PROT_IS_MASK;
4036 max_protection &= ~VM_PROT_IS_MASK;
4037
4038 /*
4039 * Check arguments for validity
4040 */
4041 if ((target_map == VM_MAP_NULL) ||
4042 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4043 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4044 (inheritance > VM_INHERIT_LAST_VALID) ||
4045 (try_prefault && (copy || !page_list)) ||
4046 initial_size == 0) {
4047 return KERN_INVALID_ARGUMENT;
4048 }
4049
4050 #if __arm64__
4051 if (cur_protection & VM_PROT_EXECUTE) {
4052 cur_protection |= VM_PROT_READ;
4053 }
4054
4055 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4056 /* no "fourk" if map is using a sub-page page size */
4057 fourk = FALSE;
4058 }
4059 if (fourk) {
4060 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4061 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4062 } else
4063 #endif /* __arm64__ */
4064 {
4065 map_addr = vm_map_trunc_page(*address,
4066 VM_MAP_PAGE_MASK(target_map));
4067 map_size = vm_map_round_page(initial_size,
4068 VM_MAP_PAGE_MASK(target_map));
4069 }
4070 if (map_size == 0) {
4071 return KERN_INVALID_ARGUMENT;
4072 }
4073 size = vm_object_round_page(initial_size);
4074
4075 /*
4076 * Find the vm object (if any) corresponding to this port.
4077 */
4078 if (!IP_VALID(port)) {
4079 object = VM_OBJECT_NULL;
4080 offset = 0;
4081 copy = FALSE;
4082 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4083 vm_named_entry_t named_entry;
4084 vm_object_offset_t data_offset;
4085
4086 named_entry = mach_memory_entry_from_port(port);
4087
4088 if (vmk_flags.vmf_return_data_addr ||
4089 vmk_flags.vmf_return_4k_data_addr) {
4090 data_offset = named_entry->data_offset;
4091 offset += named_entry->data_offset;
4092 } else {
4093 data_offset = 0;
4094 }
4095
4096 /* a few checks to make sure user is obeying rules */
4097 if (mask_max_protection) {
4098 max_protection &= named_entry->protection;
4099 }
4100 if (mask_cur_protection) {
4101 cur_protection &= named_entry->protection;
4102 }
4103 if ((named_entry->protection & max_protection) !=
4104 max_protection) {
4105 return KERN_INVALID_RIGHT;
4106 }
4107 if ((named_entry->protection & cur_protection) !=
4108 cur_protection) {
4109 return KERN_INVALID_RIGHT;
4110 }
4111 if (offset + size <= offset) {
4112 /* overflow */
4113 return KERN_INVALID_ARGUMENT;
4114 }
4115 if (named_entry->size < (offset + initial_size)) {
4116 return KERN_INVALID_ARGUMENT;
4117 }
4118
4119 if (named_entry->is_copy) {
4120 /* for a vm_map_copy, we can only map it whole */
4121 if ((size != named_entry->size) &&
4122 (vm_map_round_page(size,
4123 VM_MAP_PAGE_MASK(target_map)) ==
4124 named_entry->size)) {
4125 /* XXX FBDP use the rounded size... */
4126 size = vm_map_round_page(
4127 size,
4128 VM_MAP_PAGE_MASK(target_map));
4129 }
4130 }
4131
4132 /* the callers parameter offset is defined to be the */
4133 /* offset from beginning of named entry offset in object */
4134 offset = offset + named_entry->offset;
4135
4136 if (!VM_MAP_PAGE_ALIGNED(size,
4137 VM_MAP_PAGE_MASK(target_map))) {
4138 /*
4139 * Let's not map more than requested;
4140 * vm_map_enter() will handle this "not map-aligned"
4141 * case.
4142 */
4143 map_size = size;
4144 }
4145
4146 named_entry_lock(named_entry);
4147 if (named_entry->is_sub_map) {
4148 vm_map_t submap;
4149
4150 if (vmk_flags.vmf_return_data_addr ||
4151 vmk_flags.vmf_return_4k_data_addr) {
4152 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4153 }
4154
4155 submap = named_entry->backing.map;
4156 vm_map_reference(submap);
4157 named_entry_unlock(named_entry);
4158
4159 vmk_flags.vmkf_submap = TRUE;
4160
4161 result = vm_map_enter(target_map,
4162 &map_addr,
4163 map_size,
4164 mask,
4165 vmk_flags,
4166 (vm_object_t)(uintptr_t) submap,
4167 offset,
4168 copy,
4169 cur_protection,
4170 max_protection,
4171 inheritance);
4172 if (result != KERN_SUCCESS) {
4173 vm_map_deallocate(submap);
4174 } else {
4175 /*
4176 * No need to lock "submap" just to check its
4177 * "mapped" flag: that flag is never reset
4178 * once it's been set and if we race, we'll
4179 * just end up setting it twice, which is OK.
4180 */
4181 if (submap->mapped_in_other_pmaps == FALSE &&
4182 vm_map_pmap(submap) != PMAP_NULL &&
4183 vm_map_pmap(submap) !=
4184 vm_map_pmap(target_map)) {
4185 /*
4186 * This submap is being mapped in a map
4187 * that uses a different pmap.
4188 * Set its "mapped_in_other_pmaps" flag
4189 * to indicate that we now need to
4190 * remove mappings from all pmaps rather
4191 * than just the submap's pmap.
4192 */
4193 vm_map_lock(submap);
4194 submap->mapped_in_other_pmaps = TRUE;
4195 vm_map_unlock(submap);
4196 }
4197 *address = map_addr;
4198 }
4199 return result;
4200 } else if (named_entry->is_copy) {
4201 kern_return_t kr;
4202 vm_map_copy_t copy_map;
4203 vm_map_entry_t copy_entry;
4204 vm_map_offset_t copy_addr;
4205 vm_map_copy_t target_copy_map;
4206 vm_map_offset_t overmap_start, overmap_end;
4207 vm_map_offset_t trimmed_start;
4208 vm_map_size_t target_size;
4209
4210 if (!vm_map_kernel_flags_check_vmflags(vmk_flags,
4211 (VM_FLAGS_FIXED |
4212 VM_FLAGS_ANYWHERE |
4213 VM_FLAGS_OVERWRITE |
4214 VM_FLAGS_RETURN_4K_DATA_ADDR |
4215 VM_FLAGS_RETURN_DATA_ADDR))) {
4216 named_entry_unlock(named_entry);
4217 return KERN_INVALID_ARGUMENT;
4218 }
4219
4220 copy_map = named_entry->backing.copy;
4221 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4222 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4223 /* unsupported type; should not happen */
4224 printf("vm_map_enter_mem_object: "
4225 "memory_entry->backing.copy "
4226 "unsupported type 0x%x\n",
4227 copy_map->type);
4228 named_entry_unlock(named_entry);
4229 return KERN_INVALID_ARGUMENT;
4230 }
4231
4232 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4233 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4234 }
4235
4236 if (vmk_flags.vmf_return_data_addr ||
4237 vmk_flags.vmf_return_4k_data_addr) {
4238 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4239 if (vmk_flags.vmf_return_4k_data_addr) {
4240 offset_in_mapping &= ~((signed)(0xFFF));
4241 }
4242 }
4243
4244 target_copy_map = VM_MAP_COPY_NULL;
4245 target_size = copy_map->size;
4246 overmap_start = 0;
4247 overmap_end = 0;
4248 trimmed_start = 0;
4249 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4250 DEBUG4K_ADJUST("adjusting...\n");
4251 kr = vm_map_copy_adjust_to_target(
4252 copy_map,
4253 offset /* includes data_offset */,
4254 initial_size,
4255 target_map,
4256 copy,
4257 &target_copy_map,
4258 &overmap_start,
4259 &overmap_end,
4260 &trimmed_start);
4261 if (kr != KERN_SUCCESS) {
4262 named_entry_unlock(named_entry);
4263 return kr;
4264 }
4265 target_size = target_copy_map->size;
4266 if (trimmed_start >= data_offset) {
4267 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4268 } else {
4269 data_offset -= trimmed_start;
4270 }
4271 } else {
4272 /*
4273 * Assert that the vm_map_copy is coming from the right
4274 * zone and hasn't been forged
4275 */
4276 vm_map_copy_require(copy_map);
4277 target_copy_map = copy_map;
4278 }
4279
4280 vm_map_kernel_flags_t rsv_flags = vmk_flags;
4281
4282 vm_map_kernel_flags_and_vmflags(&rsv_flags,
4283 (VM_FLAGS_FIXED |
4284 VM_FLAGS_ANYWHERE |
4285 VM_FLAGS_OVERWRITE |
4286 VM_FLAGS_RETURN_4K_DATA_ADDR |
4287 VM_FLAGS_RETURN_DATA_ADDR));
4288
4289 /* reserve a contiguous range */
4290 kr = vm_map_enter(target_map,
4291 &map_addr,
4292 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4293 mask,
4294 rsv_flags,
4295 VM_OBJECT_NULL,
4296 0,
4297 FALSE, /* copy */
4298 cur_protection,
4299 max_protection,
4300 inheritance);
4301 if (kr != KERN_SUCCESS) {
4302 DEBUG4K_ERROR("kr 0x%x\n", kr);
4303 if (target_copy_map != copy_map) {
4304 vm_map_copy_discard(target_copy_map);
4305 target_copy_map = VM_MAP_COPY_NULL;
4306 }
4307 named_entry_unlock(named_entry);
4308 return kr;
4309 }
4310
4311 copy_addr = map_addr;
4312
4313 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4314 copy_entry != vm_map_copy_to_entry(target_copy_map);
4315 copy_entry = copy_entry->vme_next) {
4316 vm_map_t copy_submap = VM_MAP_NULL;
4317 vm_object_t copy_object = VM_OBJECT_NULL;
4318 vm_map_size_t copy_size;
4319 vm_object_offset_t copy_offset;
4320 boolean_t do_copy = false;
4321
4322 if (copy_entry->is_sub_map) {
4323 copy_submap = VME_SUBMAP(copy_entry);
4324 copy_object = (vm_object_t)copy_submap;
4325 } else {
4326 copy_object = VME_OBJECT(copy_entry);
4327 }
4328 copy_offset = VME_OFFSET(copy_entry);
4329 copy_size = (copy_entry->vme_end -
4330 copy_entry->vme_start);
4331
4332 /* sanity check */
4333 if ((copy_addr + copy_size) >
4334 (map_addr +
4335 overmap_start + overmap_end +
4336 named_entry->size /* XXX full size */)) {
4337 /* over-mapping too much !? */
4338 kr = KERN_INVALID_ARGUMENT;
4339 DEBUG4K_ERROR("kr 0x%x\n", kr);
4340 /* abort */
4341 break;
4342 }
4343
4344 /* take a reference on the object */
4345 if (copy_entry->is_sub_map) {
4346 vm_map_reference(copy_submap);
4347 } else {
4348 if (!copy &&
4349 copy_object != VM_OBJECT_NULL &&
4350 copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4351 /*
4352 * We need to resolve our side of this
4353 * "symmetric" copy-on-write now; we
4354 * need a new object to map and share,
4355 * instead of the current one which
4356 * might still be shared with the
4357 * original mapping.
4358 *
4359 * Note: A "vm_map_copy_t" does not
4360 * have a lock but we're protected by
4361 * the named entry's lock here.
4362 */
4363 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4364 VME_OBJECT_SHADOW(copy_entry, copy_size, TRUE);
4365 assert(copy_object != VME_OBJECT(copy_entry));
4366 if (!copy_entry->needs_copy &&
4367 copy_entry->protection & VM_PROT_WRITE) {
4368 vm_prot_t prot;
4369
4370 prot = copy_entry->protection & ~VM_PROT_WRITE;
4371 vm_object_pmap_protect(copy_object,
4372 copy_offset,
4373 copy_size,
4374 PMAP_NULL,
4375 PAGE_SIZE,
4376 0,
4377 prot);
4378 }
4379 copy_entry->needs_copy = FALSE;
4380 copy_entry->is_shared = TRUE;
4381 copy_object = VME_OBJECT(copy_entry);
4382 copy_offset = VME_OFFSET(copy_entry);
4383 vm_object_lock(copy_object);
4384 /* we're about to make a shared mapping of this object */
4385 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4386 copy_object->true_share = TRUE;
4387 vm_object_unlock(copy_object);
4388 }
4389
4390 if (copy_object != VM_OBJECT_NULL &&
4391 copy_object->named &&
4392 copy_object->pager != MEMORY_OBJECT_NULL &&
4393 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4394 memory_object_t pager;
4395 vm_prot_t pager_prot;
4396
4397 /*
4398 * For "named" VM objects, let the pager know that the
4399 * memory object is being mapped. Some pagers need to keep
4400 * track of this, to know when they can reclaim the memory
4401 * object, for example.
4402 * VM calls memory_object_map() for each mapping (specifying
4403 * the protection of each mapping) and calls
4404 * memory_object_last_unmap() when all the mappings are gone.
4405 */
4406 pager_prot = max_protection;
4407 if (copy) {
4408 /*
4409 * Copy-On-Write mapping: won't modify the
4410 * memory object.
4411 */
4412 pager_prot &= ~VM_PROT_WRITE;
4413 }
4414 vm_object_lock(copy_object);
4415 pager = copy_object->pager;
4416 if (copy_object->named &&
4417 pager != MEMORY_OBJECT_NULL &&
4418 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4419 assert(copy_object->pager_ready);
4420 vm_object_mapping_wait(copy_object, THREAD_UNINT);
4421 vm_object_mapping_begin(copy_object);
4422 vm_object_unlock(copy_object);
4423
4424 kr = memory_object_map(pager, pager_prot);
4425 assert(kr == KERN_SUCCESS);
4426
4427 vm_object_lock(copy_object);
4428 vm_object_mapping_end(copy_object);
4429 }
4430 vm_object_unlock(copy_object);
4431 }
4432
4433 /*
4434 * Perform the copy if requested
4435 */
4436
4437 if (copy && copy_object != VM_OBJECT_NULL) {
4438 vm_object_t new_object;
4439 vm_object_offset_t new_offset;
4440
4441 result = vm_object_copy_strategically(copy_object, copy_offset,
4442 copy_size,
4443 &new_object, &new_offset,
4444 &do_copy);
4445
4446
4447 if (result == KERN_MEMORY_RESTART_COPY) {
4448 boolean_t success;
4449 boolean_t src_needs_copy;
4450
4451 /*
4452 * XXX
4453 * We currently ignore src_needs_copy.
4454 * This really is the issue of how to make
4455 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4456 * non-kernel users to use. Solution forthcoming.
4457 * In the meantime, since we don't allow non-kernel
4458 * memory managers to specify symmetric copy,
4459 * we won't run into problems here.
4460 */
4461 new_object = copy_object;
4462 new_offset = copy_offset;
4463 success = vm_object_copy_quickly(new_object,
4464 new_offset,
4465 copy_size,
4466 &src_needs_copy,
4467 &do_copy);
4468 assert(success);
4469 result = KERN_SUCCESS;
4470 }
4471 if (result != KERN_SUCCESS) {
4472 kr = result;
4473 break;
4474 }
4475
4476 copy_object = new_object;
4477 copy_offset = new_offset;
4478 /*
4479 * No extra object reference for the mapping:
4480 * the mapping should be the only thing keeping
4481 * this new object alive.
4482 */
4483 } else {
4484 /*
4485 * We already have the right object
4486 * to map.
4487 */
4488 copy_object = VME_OBJECT(copy_entry);
4489 /* take an extra ref for the mapping below */
4490 vm_object_reference(copy_object);
4491 }
4492 }
4493
4494 /*
4495 * If the caller does not want a specific
4496 * tag for this new mapping: use
4497 * the tag of the original mapping.
4498 */
4499 vm_map_kernel_flags_t vmk_remap_flags = {
4500 .vmkf_submap = copy_entry->is_sub_map,
4501 };
4502
4503 vm_map_kernel_flags_set_vmflags(&vmk_remap_flags,
4504 vm_map_kernel_flags_vmflags(vmk_flags),
4505 vmk_flags.vm_tag ?: VME_ALIAS(copy_entry));
4506
4507 /* over-map the object into destination */
4508 vmk_remap_flags.vmf_fixed = true;
4509 vmk_remap_flags.vmf_overwrite = true;
4510
4511 if (!copy && !copy_entry->is_sub_map) {
4512 /*
4513 * copy-on-write should have been
4514 * resolved at this point, or we would
4515 * end up sharing instead of copying.
4516 */
4517 assert(!copy_entry->needs_copy);
4518 }
4519 #if XNU_TARGET_OS_OSX
4520 if (copy_entry->used_for_jit) {
4521 vmk_remap_flags.vmkf_map_jit = TRUE;
4522 }
4523 #endif /* XNU_TARGET_OS_OSX */
4524
4525 kr = vm_map_enter(target_map,
4526 ©_addr,
4527 copy_size,
4528 (vm_map_offset_t) 0,
4529 vmk_remap_flags,
4530 copy_object,
4531 copy_offset,
4532 ((copy_object == NULL)
4533 ? FALSE
4534 : (copy || copy_entry->needs_copy)),
4535 cur_protection,
4536 max_protection,
4537 inheritance);
4538 if (kr != KERN_SUCCESS) {
4539 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4540 if (copy_entry->is_sub_map) {
4541 vm_map_deallocate(copy_submap);
4542 } else {
4543 vm_object_deallocate(copy_object);
4544 }
4545 /* abort */
4546 break;
4547 }
4548
4549 /* next mapping */
4550 copy_addr += copy_size;
4551 }
4552
4553 if (kr == KERN_SUCCESS) {
4554 if (vmk_flags.vmf_return_data_addr ||
4555 vmk_flags.vmf_return_4k_data_addr) {
4556 *address = map_addr + offset_in_mapping;
4557 } else {
4558 *address = map_addr;
4559 }
4560 if (overmap_start) {
4561 *address += overmap_start;
4562 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4563 }
4564 }
4565 named_entry_unlock(named_entry);
4566 if (target_copy_map != copy_map) {
4567 vm_map_copy_discard(target_copy_map);
4568 target_copy_map = VM_MAP_COPY_NULL;
4569 }
4570
4571 if (kr != KERN_SUCCESS && !vmk_flags.vmf_overwrite) {
4572 /* deallocate the contiguous range */
4573 (void) vm_deallocate(target_map,
4574 map_addr,
4575 map_size);
4576 }
4577
4578 return kr;
4579 }
4580
4581 if (named_entry->is_object) {
4582 unsigned int access;
4583 unsigned int wimg_mode;
4584
4585 /* we are mapping a VM object */
4586
4587 access = named_entry->access;
4588
4589 if (vmk_flags.vmf_return_data_addr ||
4590 vmk_flags.vmf_return_4k_data_addr) {
4591 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4592 if (vmk_flags.vmf_return_4k_data_addr) {
4593 offset_in_mapping &= ~((signed)(0xFFF));
4594 }
4595 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4596 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4597 }
4598
4599 object = vm_named_entry_to_vm_object(named_entry);
4600 assert(object != VM_OBJECT_NULL);
4601 vm_object_lock(object);
4602 named_entry_unlock(named_entry);
4603
4604 vm_object_reference_locked(object);
4605
4606 wimg_mode = object->wimg_bits;
4607 vm_prot_to_wimg(access, &wimg_mode);
4608 if (object->wimg_bits != wimg_mode) {
4609 vm_object_change_wimg_mode(object, wimg_mode);
4610 }
4611
4612 vm_object_unlock(object);
4613 } else {
4614 panic("invalid VM named entry %p", named_entry);
4615 }
4616 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4617 /*
4618 * JMM - This is temporary until we unify named entries
4619 * and raw memory objects.
4620 *
4621 * Detected fake ip_kotype for a memory object. In
4622 * this case, the port isn't really a port at all, but
4623 * instead is just a raw memory object.
4624 */
4625 if (vmk_flags.vmf_return_data_addr ||
4626 vmk_flags.vmf_return_4k_data_addr) {
4627 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4628 }
4629
4630 object = memory_object_to_vm_object((memory_object_t)port);
4631 if (object == VM_OBJECT_NULL) {
4632 return KERN_INVALID_OBJECT;
4633 }
4634 vm_object_reference(object);
4635
4636 /* wait for object (if any) to be ready */
4637 if (object != VM_OBJECT_NULL) {
4638 if (object == kernel_object) {
4639 printf("Warning: Attempt to map kernel object"
4640 " by a non-private kernel entity\n");
4641 return KERN_INVALID_OBJECT;
4642 }
4643 if (!object->pager_ready) {
4644 vm_object_lock(object);
4645
4646 while (!object->pager_ready) {
4647 vm_object_wait(object,
4648 VM_OBJECT_EVENT_PAGER_READY,
4649 THREAD_UNINT);
4650 vm_object_lock(object);
4651 }
4652 vm_object_unlock(object);
4653 }
4654 }
4655 } else {
4656 return KERN_INVALID_OBJECT;
4657 }
4658
4659 if (object != VM_OBJECT_NULL &&
4660 object->named &&
4661 object->pager != MEMORY_OBJECT_NULL &&
4662 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4663 memory_object_t pager;
4664 vm_prot_t pager_prot;
4665 kern_return_t kr;
4666
4667 /*
4668 * For "named" VM objects, let the pager know that the
4669 * memory object is being mapped. Some pagers need to keep
4670 * track of this, to know when they can reclaim the memory
4671 * object, for example.
4672 * VM calls memory_object_map() for each mapping (specifying
4673 * the protection of each mapping) and calls
4674 * memory_object_last_unmap() when all the mappings are gone.
4675 */
4676 pager_prot = max_protection;
4677 if (copy) {
4678 /*
4679 * Copy-On-Write mapping: won't modify the
4680 * memory object.
4681 */
4682 pager_prot &= ~VM_PROT_WRITE;
4683 }
4684 vm_object_lock(object);
4685 pager = object->pager;
4686 if (object->named &&
4687 pager != MEMORY_OBJECT_NULL &&
4688 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4689 assert(object->pager_ready);
4690 vm_object_mapping_wait(object, THREAD_UNINT);
4691 vm_object_mapping_begin(object);
4692 vm_object_unlock(object);
4693
4694 kr = memory_object_map(pager, pager_prot);
4695 assert(kr == KERN_SUCCESS);
4696
4697 vm_object_lock(object);
4698 vm_object_mapping_end(object);
4699 }
4700 vm_object_unlock(object);
4701 }
4702
4703 /*
4704 * Perform the copy if requested
4705 */
4706
4707 if (copy) {
4708 vm_object_t new_object;
4709 vm_object_offset_t new_offset;
4710
4711 result = vm_object_copy_strategically(object, offset,
4712 map_size,
4713 &new_object, &new_offset,
4714 ©);
4715
4716
4717 if (result == KERN_MEMORY_RESTART_COPY) {
4718 boolean_t success;
4719 boolean_t src_needs_copy;
4720
4721 /*
4722 * XXX
4723 * We currently ignore src_needs_copy.
4724 * This really is the issue of how to make
4725 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4726 * non-kernel users to use. Solution forthcoming.
4727 * In the meantime, since we don't allow non-kernel
4728 * memory managers to specify symmetric copy,
4729 * we won't run into problems here.
4730 */
4731 new_object = object;
4732 new_offset = offset;
4733 success = vm_object_copy_quickly(new_object,
4734 new_offset,
4735 map_size,
4736 &src_needs_copy,
4737 ©);
4738 assert(success);
4739 result = KERN_SUCCESS;
4740 }
4741 /*
4742 * Throw away the reference to the
4743 * original object, as it won't be mapped.
4744 */
4745
4746 vm_object_deallocate(object);
4747
4748 if (result != KERN_SUCCESS) {
4749 return result;
4750 }
4751
4752 object = new_object;
4753 offset = new_offset;
4754 }
4755
4756 /*
4757 * If non-kernel users want to try to prefault pages, the mapping and prefault
4758 * needs to be atomic.
4759 */
4760 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4761 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4762
4763 #if __arm64__
4764 if (fourk) {
4765 /* map this object in a "4K" pager */
4766 result = vm_map_enter_fourk(target_map,
4767 &map_addr,
4768 map_size,
4769 (vm_map_offset_t) mask,
4770 vmk_flags,
4771 object,
4772 offset,
4773 copy,
4774 cur_protection,
4775 max_protection,
4776 inheritance);
4777 } else
4778 #endif /* __arm64__ */
4779 {
4780 result = vm_map_enter(target_map,
4781 &map_addr, map_size,
4782 (vm_map_offset_t)mask,
4783 vmk_flags,
4784 object, offset,
4785 copy,
4786 cur_protection, max_protection,
4787 inheritance);
4788 }
4789 if (result != KERN_SUCCESS) {
4790 vm_object_deallocate(object);
4791 }
4792
4793 /*
4794 * Try to prefault, and do not forget to release the vm map lock.
4795 */
4796 if (result == KERN_SUCCESS && try_prefault) {
4797 mach_vm_address_t va = map_addr;
4798 kern_return_t kr = KERN_SUCCESS;
4799 unsigned int i = 0;
4800 int pmap_options;
4801
4802 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4803 if (object->internal) {
4804 pmap_options |= PMAP_OPTIONS_INTERNAL;
4805 }
4806
4807 for (i = 0; i < page_list_count; ++i) {
4808 if (!UPL_VALID_PAGE(page_list, i)) {
4809 if (kernel_prefault) {
4810 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4811 result = KERN_MEMORY_ERROR;
4812 break;
4813 }
4814 } else {
4815 /*
4816 * If this function call failed, we should stop
4817 * trying to optimize, other calls are likely
4818 * going to fail too.
4819 *
4820 * We are not gonna report an error for such
4821 * failure though. That's an optimization, not
4822 * something critical.
4823 */
4824 kr = pmap_enter_options(target_map->pmap,
4825 va, UPL_PHYS_PAGE(page_list, i),
4826 cur_protection, VM_PROT_NONE,
4827 0, TRUE, pmap_options, NULL);
4828 if (kr != KERN_SUCCESS) {
4829 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4830 if (kernel_prefault) {
4831 result = kr;
4832 }
4833 break;
4834 }
4835 OSIncrementAtomic64(&vm_prefault_nb_pages);
4836 }
4837
4838 /* Next virtual address */
4839 va += PAGE_SIZE;
4840 }
4841 if (vmk_flags.vmkf_keep_map_locked) {
4842 vm_map_unlock(target_map);
4843 }
4844 }
4845
4846 if (vmk_flags.vmf_return_data_addr ||
4847 vmk_flags.vmf_return_4k_data_addr) {
4848 *address = map_addr + offset_in_mapping;
4849 } else {
4850 *address = map_addr;
4851 }
4852 return result;
4853 }
4854
4855 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4856 vm_map_enter_mem_object(
4857 vm_map_t target_map,
4858 vm_map_offset_t *address,
4859 vm_map_size_t initial_size,
4860 vm_map_offset_t mask,
4861 vm_map_kernel_flags_t vmk_flags,
4862 ipc_port_t port,
4863 vm_object_offset_t offset,
4864 boolean_t copy,
4865 vm_prot_t cur_protection,
4866 vm_prot_t max_protection,
4867 vm_inherit_t inheritance)
4868 {
4869 kern_return_t ret;
4870
4871 /* range_id is set by vm_map_enter_mem_object_helper */
4872 ret = vm_map_enter_mem_object_helper(target_map,
4873 address,
4874 initial_size,
4875 mask,
4876 vmk_flags,
4877 port,
4878 offset,
4879 copy,
4880 cur_protection,
4881 max_protection,
4882 inheritance,
4883 NULL,
4884 0);
4885
4886 #if KASAN
4887 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4888 kasan_notify_address(*address, initial_size);
4889 }
4890 #endif
4891
4892 return ret;
4893 }
4894
4895 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4896 vm_map_enter_mem_object_prefault(
4897 vm_map_t target_map,
4898 vm_map_offset_t *address,
4899 vm_map_size_t initial_size,
4900 vm_map_offset_t mask,
4901 vm_map_kernel_flags_t vmk_flags,
4902 ipc_port_t port,
4903 vm_object_offset_t offset,
4904 vm_prot_t cur_protection,
4905 vm_prot_t max_protection,
4906 upl_page_list_ptr_t page_list,
4907 unsigned int page_list_count)
4908 {
4909 kern_return_t ret;
4910
4911 /* range_id is set by vm_map_enter_mem_object_helper */
4912 ret = vm_map_enter_mem_object_helper(target_map,
4913 address,
4914 initial_size,
4915 mask,
4916 vmk_flags,
4917 port,
4918 offset,
4919 FALSE,
4920 cur_protection,
4921 max_protection,
4922 VM_INHERIT_DEFAULT,
4923 page_list,
4924 page_list_count);
4925
4926 #if KASAN
4927 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4928 kasan_notify_address(*address, initial_size);
4929 }
4930 #endif
4931
4932 return ret;
4933 }
4934
4935
4936 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4937 vm_map_enter_mem_object_control(
4938 vm_map_t target_map,
4939 vm_map_offset_t *address,
4940 vm_map_size_t initial_size,
4941 vm_map_offset_t mask,
4942 vm_map_kernel_flags_t vmk_flags,
4943 memory_object_control_t control,
4944 vm_object_offset_t offset,
4945 boolean_t copy,
4946 vm_prot_t cur_protection,
4947 vm_prot_t max_protection,
4948 vm_inherit_t inheritance)
4949 {
4950 vm_map_address_t map_addr;
4951 vm_map_size_t map_size;
4952 vm_object_t object;
4953 vm_object_size_t size;
4954 kern_return_t result;
4955 memory_object_t pager;
4956 vm_prot_t pager_prot;
4957 kern_return_t kr;
4958 #if __arm64__
4959 boolean_t fourk = vmk_flags.vmkf_fourk;
4960 #endif /* __arm64__ */
4961
4962 /*
4963 * Check arguments for validity
4964 */
4965 if ((target_map == VM_MAP_NULL) ||
4966 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4967 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4968 (inheritance > VM_INHERIT_LAST_VALID) ||
4969 initial_size == 0) {
4970 return KERN_INVALID_ARGUMENT;
4971 }
4972
4973 #if __arm64__
4974 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4975 fourk = FALSE;
4976 }
4977
4978 if (fourk) {
4979 map_addr = vm_map_trunc_page(*address,
4980 FOURK_PAGE_MASK);
4981 map_size = vm_map_round_page(initial_size,
4982 FOURK_PAGE_MASK);
4983 } else
4984 #endif /* __arm64__ */
4985 {
4986 map_addr = vm_map_trunc_page(*address,
4987 VM_MAP_PAGE_MASK(target_map));
4988 map_size = vm_map_round_page(initial_size,
4989 VM_MAP_PAGE_MASK(target_map));
4990 }
4991 size = vm_object_round_page(initial_size);
4992
4993 object = memory_object_control_to_vm_object(control);
4994
4995 if (object == VM_OBJECT_NULL) {
4996 return KERN_INVALID_OBJECT;
4997 }
4998
4999 if (object == kernel_object) {
5000 printf("Warning: Attempt to map kernel object"
5001 " by a non-private kernel entity\n");
5002 return KERN_INVALID_OBJECT;
5003 }
5004
5005 vm_object_lock(object);
5006 object->ref_count++;
5007
5008 /*
5009 * For "named" VM objects, let the pager know that the
5010 * memory object is being mapped. Some pagers need to keep
5011 * track of this, to know when they can reclaim the memory
5012 * object, for example.
5013 * VM calls memory_object_map() for each mapping (specifying
5014 * the protection of each mapping) and calls
5015 * memory_object_last_unmap() when all the mappings are gone.
5016 */
5017 pager_prot = max_protection;
5018 if (copy) {
5019 pager_prot &= ~VM_PROT_WRITE;
5020 }
5021 pager = object->pager;
5022 if (object->named &&
5023 pager != MEMORY_OBJECT_NULL &&
5024 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5025 assert(object->pager_ready);
5026 vm_object_mapping_wait(object, THREAD_UNINT);
5027 vm_object_mapping_begin(object);
5028 vm_object_unlock(object);
5029
5030 kr = memory_object_map(pager, pager_prot);
5031 assert(kr == KERN_SUCCESS);
5032
5033 vm_object_lock(object);
5034 vm_object_mapping_end(object);
5035 }
5036 vm_object_unlock(object);
5037
5038 /*
5039 * Perform the copy if requested
5040 */
5041
5042 if (copy) {
5043 vm_object_t new_object;
5044 vm_object_offset_t new_offset;
5045
5046 result = vm_object_copy_strategically(object, offset, size,
5047 &new_object, &new_offset,
5048 ©);
5049
5050
5051 if (result == KERN_MEMORY_RESTART_COPY) {
5052 boolean_t success;
5053 boolean_t src_needs_copy;
5054
5055 /*
5056 * XXX
5057 * We currently ignore src_needs_copy.
5058 * This really is the issue of how to make
5059 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5060 * non-kernel users to use. Solution forthcoming.
5061 * In the meantime, since we don't allow non-kernel
5062 * memory managers to specify symmetric copy,
5063 * we won't run into problems here.
5064 */
5065 new_object = object;
5066 new_offset = offset;
5067 success = vm_object_copy_quickly(new_object,
5068 new_offset, size,
5069 &src_needs_copy,
5070 ©);
5071 assert(success);
5072 result = KERN_SUCCESS;
5073 }
5074 /*
5075 * Throw away the reference to the
5076 * original object, as it won't be mapped.
5077 */
5078
5079 vm_object_deallocate(object);
5080
5081 if (result != KERN_SUCCESS) {
5082 return result;
5083 }
5084
5085 object = new_object;
5086 offset = new_offset;
5087 }
5088
5089 #if __arm64__
5090 if (fourk) {
5091 result = vm_map_enter_fourk(target_map,
5092 &map_addr,
5093 map_size,
5094 (vm_map_offset_t)mask,
5095 vmk_flags,
5096 object, offset,
5097 copy,
5098 cur_protection, max_protection,
5099 inheritance);
5100 } else
5101 #endif /* __arm64__ */
5102 {
5103 result = vm_map_enter(target_map,
5104 &map_addr, map_size,
5105 (vm_map_offset_t)mask,
5106 vmk_flags,
5107 object, offset,
5108 copy,
5109 cur_protection, max_protection,
5110 inheritance);
5111 }
5112 if (result != KERN_SUCCESS) {
5113 vm_object_deallocate(object);
5114 }
5115 *address = map_addr;
5116
5117 return result;
5118 }
5119
5120
5121 #if VM_CPM
5122
5123 #ifdef MACH_ASSERT
5124 extern pmap_paddr_t avail_start, avail_end;
5125 #endif
5126
5127 /*
5128 * Allocate memory in the specified map, with the caveat that
5129 * the memory is physically contiguous. This call may fail
5130 * if the system can't find sufficient contiguous memory.
5131 * This call may cause or lead to heart-stopping amounts of
5132 * paging activity.
5133 *
5134 * Memory obtained from this call should be freed in the
5135 * normal way, viz., via vm_deallocate.
5136 */
5137 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,vm_map_kernel_flags_t vmk_flags)5138 vm_map_enter_cpm(
5139 vm_map_t map,
5140 vm_map_offset_t *addr,
5141 vm_map_size_t size,
5142 vm_map_kernel_flags_t vmk_flags)
5143 {
5144 vm_object_t cpm_obj;
5145 pmap_t pmap;
5146 vm_page_t m, pages;
5147 kern_return_t kr;
5148 vm_map_offset_t va, start, end, offset;
5149 #if MACH_ASSERT
5150 vm_map_offset_t prev_addr = 0;
5151 #endif /* MACH_ASSERT */
5152
5153 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5154 /* XXX TODO4K do we need to support this? */
5155 *addr = 0;
5156 return KERN_NOT_SUPPORTED;
5157 }
5158
5159 if (size == 0) {
5160 *addr = 0;
5161 return KERN_SUCCESS;
5162 }
5163 if (vmk_flags.vmf_fixed) {
5164 *addr = vm_map_trunc_page(*addr,
5165 VM_MAP_PAGE_MASK(map));
5166 } else {
5167 *addr = vm_map_min(map);
5168 }
5169 size = vm_map_round_page(size,
5170 VM_MAP_PAGE_MASK(map));
5171
5172 /*
5173 * LP64todo - cpm_allocate should probably allow
5174 * allocations of >4GB, but not with the current
5175 * algorithm, so just cast down the size for now.
5176 */
5177 if (size > VM_MAX_ADDRESS) {
5178 return KERN_RESOURCE_SHORTAGE;
5179 }
5180 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5181 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5182 return kr;
5183 }
5184
5185 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5186 assert(cpm_obj != VM_OBJECT_NULL);
5187 assert(cpm_obj->internal);
5188 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5189 assert(cpm_obj->can_persist == FALSE);
5190 assert(cpm_obj->pager_created == FALSE);
5191 assert(cpm_obj->pageout == FALSE);
5192 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5193
5194 /*
5195 * Insert pages into object.
5196 */
5197
5198 vm_object_lock(cpm_obj);
5199 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5200 m = pages;
5201 pages = NEXT_PAGE(m);
5202 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5203
5204 assert(!m->vmp_gobbled);
5205 assert(!m->vmp_wanted);
5206 assert(!m->vmp_pageout);
5207 assert(!m->vmp_tabled);
5208 assert(VM_PAGE_WIRED(m));
5209 assert(m->vmp_busy);
5210 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5211
5212 m->vmp_busy = FALSE;
5213 vm_page_insert(m, cpm_obj, offset);
5214 }
5215 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5216 vm_object_unlock(cpm_obj);
5217
5218 /*
5219 * Hang onto a reference on the object in case a
5220 * multi-threaded application for some reason decides
5221 * to deallocate the portion of the address space into
5222 * which we will insert this object.
5223 *
5224 * Unfortunately, we must insert the object now before
5225 * we can talk to the pmap module about which addresses
5226 * must be wired down. Hence, the race with a multi-
5227 * threaded app.
5228 */
5229 vm_object_reference(cpm_obj);
5230
5231 /*
5232 * Insert object into map.
5233 */
5234
5235 kr = vm_map_enter(
5236 map,
5237 addr,
5238 size,
5239 (vm_map_offset_t)0,
5240 vmk_flags,
5241 cpm_obj,
5242 (vm_object_offset_t)0,
5243 FALSE,
5244 VM_PROT_ALL,
5245 VM_PROT_ALL,
5246 VM_INHERIT_DEFAULT);
5247
5248 if (kr != KERN_SUCCESS) {
5249 /*
5250 * A CPM object doesn't have can_persist set,
5251 * so all we have to do is deallocate it to
5252 * free up these pages.
5253 */
5254 assert(cpm_obj->pager_created == FALSE);
5255 assert(cpm_obj->can_persist == FALSE);
5256 assert(cpm_obj->pageout == FALSE);
5257 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5258 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5259 vm_object_deallocate(cpm_obj); /* kill creation ref */
5260 }
5261
5262 /*
5263 * Inform the physical mapping system that the
5264 * range of addresses may not fault, so that
5265 * page tables and such can be locked down as well.
5266 */
5267 start = *addr;
5268 end = start + size;
5269 pmap = vm_map_pmap(map);
5270 pmap_pageable(pmap, start, end, FALSE);
5271
5272 /*
5273 * Enter each page into the pmap, to avoid faults.
5274 * Note that this loop could be coded more efficiently,
5275 * if the need arose, rather than looking up each page
5276 * again.
5277 */
5278 for (offset = 0, va = start; offset < size;
5279 va += PAGE_SIZE, offset += PAGE_SIZE) {
5280 int type_of_fault;
5281
5282 vm_object_lock(cpm_obj);
5283 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5284 assert(m != VM_PAGE_NULL);
5285
5286 vm_page_zero_fill(m);
5287
5288 type_of_fault = DBG_ZERO_FILL_FAULT;
5289
5290 vm_fault_enter(m, pmap, va,
5291 PAGE_SIZE, 0,
5292 VM_PROT_ALL, VM_PROT_WRITE,
5293 VM_PAGE_WIRED(m),
5294 FALSE, /* change_wiring */
5295 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5296 FALSE, /* cs_bypass */
5297 0, /* user_tag */
5298 0, /* pmap_options */
5299 NULL, /* need_retry */
5300 &type_of_fault);
5301
5302 vm_object_unlock(cpm_obj);
5303 }
5304
5305 #if MACH_ASSERT
5306 /*
5307 * Verify ordering in address space.
5308 */
5309 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5310 vm_object_lock(cpm_obj);
5311 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5312 vm_object_unlock(cpm_obj);
5313 if (m == VM_PAGE_NULL) {
5314 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5315 cpm_obj, (uint64_t)offset);
5316 }
5317 assert(m->vmp_tabled);
5318 assert(!m->vmp_busy);
5319 assert(!m->vmp_wanted);
5320 assert(!m->vmp_fictitious);
5321 assert(!m->vmp_private);
5322 assert(!m->vmp_absent);
5323 assert(!m->vmp_cleaning);
5324 assert(!m->vmp_laundry);
5325 assert(!m->vmp_precious);
5326 assert(!m->vmp_clustered);
5327 if (offset != 0) {
5328 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5329 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5330 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5331 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5332 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5333 panic("vm_allocate_cpm: pages not contig!");
5334 }
5335 }
5336 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5337 }
5338 #endif /* MACH_ASSERT */
5339
5340 vm_object_deallocate(cpm_obj); /* kill extra ref */
5341
5342 return kr;
5343 }
5344
5345
5346 #else /* VM_CPM */
5347
5348 /*
5349 * Interface is defined in all cases, but unless the kernel
5350 * is built explicitly for this option, the interface does
5351 * nothing.
5352 */
5353
5354 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused vm_map_kernel_flags_t vmk_flags)5355 vm_map_enter_cpm(
5356 __unused vm_map_t map,
5357 __unused vm_map_offset_t *addr,
5358 __unused vm_map_size_t size,
5359 __unused vm_map_kernel_flags_t vmk_flags)
5360 {
5361 return KERN_FAILURE;
5362 }
5363 #endif /* VM_CPM */
5364
5365 /* Not used without nested pmaps */
5366 #ifndef NO_NESTED_PMAP
5367 /*
5368 * Clip and unnest a portion of a nested submap mapping.
5369 */
5370
5371
5372 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5373 vm_map_clip_unnest(
5374 vm_map_t map,
5375 vm_map_entry_t entry,
5376 vm_map_offset_t start_unnest,
5377 vm_map_offset_t end_unnest)
5378 {
5379 vm_map_offset_t old_start_unnest = start_unnest;
5380 vm_map_offset_t old_end_unnest = end_unnest;
5381
5382 assert(entry->is_sub_map);
5383 assert(VME_SUBMAP(entry) != NULL);
5384 assert(entry->use_pmap);
5385
5386 /*
5387 * Query the platform for the optimal unnest range.
5388 * DRK: There's some duplication of effort here, since
5389 * callers may have adjusted the range to some extent. This
5390 * routine was introduced to support 1GiB subtree nesting
5391 * for x86 platforms, which can also nest on 2MiB boundaries
5392 * depending on size/alignment.
5393 */
5394 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5395 assert(VME_SUBMAP(entry)->is_nested_map);
5396 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5397 log_unnest_badness(map,
5398 old_start_unnest,
5399 old_end_unnest,
5400 VME_SUBMAP(entry)->is_nested_map,
5401 (entry->vme_start +
5402 VME_SUBMAP(entry)->lowest_unnestable_start -
5403 VME_OFFSET(entry)));
5404 }
5405
5406 if (entry->vme_start > start_unnest ||
5407 entry->vme_end < end_unnest) {
5408 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5409 "bad nested entry: start=0x%llx end=0x%llx\n",
5410 (long long)start_unnest, (long long)end_unnest,
5411 (long long)entry->vme_start, (long long)entry->vme_end);
5412 }
5413
5414 if (start_unnest > entry->vme_start) {
5415 _vm_map_clip_start(&map->hdr,
5416 entry,
5417 start_unnest);
5418 if (map->holelistenabled) {
5419 vm_map_store_update_first_free(map, NULL, FALSE);
5420 } else {
5421 vm_map_store_update_first_free(map, map->first_free, FALSE);
5422 }
5423 }
5424 if (entry->vme_end > end_unnest) {
5425 _vm_map_clip_end(&map->hdr,
5426 entry,
5427 end_unnest);
5428 if (map->holelistenabled) {
5429 vm_map_store_update_first_free(map, NULL, FALSE);
5430 } else {
5431 vm_map_store_update_first_free(map, map->first_free, FALSE);
5432 }
5433 }
5434
5435 pmap_unnest(map->pmap,
5436 entry->vme_start,
5437 entry->vme_end - entry->vme_start);
5438 if ((map->mapped_in_other_pmaps) && os_ref_get_count_raw(&map->map_refcnt) != 0) {
5439 /* clean up parent map/maps */
5440 vm_map_submap_pmap_clean(
5441 map, entry->vme_start,
5442 entry->vme_end,
5443 VME_SUBMAP(entry),
5444 VME_OFFSET(entry));
5445 }
5446 entry->use_pmap = FALSE;
5447 if ((map->pmap != kernel_pmap) &&
5448 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5449 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5450 }
5451 }
5452 #endif /* NO_NESTED_PMAP */
5453
5454 __abortlike
5455 static void
__vm_map_clip_atomic_entry_panic(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t where)5456 __vm_map_clip_atomic_entry_panic(
5457 vm_map_t map,
5458 vm_map_entry_t entry,
5459 vm_map_offset_t where)
5460 {
5461 panic("vm_map_clip(%p): Attempting to clip an atomic VM map entry "
5462 "%p [0x%llx:0x%llx] at 0x%llx", map, entry,
5463 (uint64_t)entry->vme_start,
5464 (uint64_t)entry->vme_end,
5465 (uint64_t)where);
5466 }
5467
5468 /*
5469 * vm_map_clip_start: [ internal use only ]
5470 *
5471 * Asserts that the given entry begins at or after
5472 * the specified address; if necessary,
5473 * it splits the entry into two.
5474 */
5475 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5476 vm_map_clip_start(
5477 vm_map_t map,
5478 vm_map_entry_t entry,
5479 vm_map_offset_t startaddr)
5480 {
5481 #ifndef NO_NESTED_PMAP
5482 if (entry->is_sub_map &&
5483 entry->use_pmap &&
5484 startaddr >= entry->vme_start) {
5485 vm_map_offset_t start_unnest, end_unnest;
5486
5487 /*
5488 * Make sure "startaddr" is no longer in a nested range
5489 * before we clip. Unnest only the minimum range the platform
5490 * can handle.
5491 * vm_map_clip_unnest may perform additional adjustments to
5492 * the unnest range.
5493 */
5494 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5495 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5496 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5497 }
5498 #endif /* NO_NESTED_PMAP */
5499 if (startaddr > entry->vme_start) {
5500 if (!entry->is_sub_map &&
5501 VME_OBJECT(entry) &&
5502 VME_OBJECT(entry)->phys_contiguous) {
5503 pmap_remove(map->pmap,
5504 (addr64_t)(entry->vme_start),
5505 (addr64_t)(entry->vme_end));
5506 }
5507 if (entry->vme_atomic) {
5508 __vm_map_clip_atomic_entry_panic(map, entry, startaddr);
5509 }
5510
5511 DTRACE_VM5(
5512 vm_map_clip_start,
5513 vm_map_t, map,
5514 vm_map_offset_t, entry->vme_start,
5515 vm_map_offset_t, entry->vme_end,
5516 vm_map_offset_t, startaddr,
5517 int, VME_ALIAS(entry));
5518
5519 _vm_map_clip_start(&map->hdr, entry, startaddr);
5520 if (map->holelistenabled) {
5521 vm_map_store_update_first_free(map, NULL, FALSE);
5522 } else {
5523 vm_map_store_update_first_free(map, map->first_free, FALSE);
5524 }
5525 }
5526 }
5527
5528
5529 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5530 MACRO_BEGIN \
5531 if ((startaddr) > (entry)->vme_start) \
5532 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5533 MACRO_END
5534
5535 /*
5536 * This routine is called only when it is known that
5537 * the entry must be split.
5538 */
5539 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5540 _vm_map_clip_start(
5541 struct vm_map_header *map_header,
5542 vm_map_entry_t entry,
5543 vm_map_offset_t start)
5544 {
5545 vm_map_entry_t new_entry;
5546
5547 /*
5548 * Split off the front portion --
5549 * note that we must insert the new
5550 * entry BEFORE this one, so that
5551 * this entry has the specified starting
5552 * address.
5553 */
5554
5555 if (entry->map_aligned) {
5556 assert(VM_MAP_PAGE_ALIGNED(start,
5557 VM_MAP_HDR_PAGE_MASK(map_header)));
5558 }
5559
5560 new_entry = _vm_map_entry_create(map_header);
5561 vm_map_entry_copy_full(new_entry, entry);
5562
5563 new_entry->vme_end = start;
5564 assert(new_entry->vme_start < new_entry->vme_end);
5565 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5566 if (__improbable(start >= entry->vme_end)) {
5567 panic("mapHdr %p entry %p start 0x%llx end 0x%llx new start 0x%llx", map_header, entry, entry->vme_start, entry->vme_end, start);
5568 }
5569 assert(start < entry->vme_end);
5570 entry->vme_start = start;
5571
5572 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5573
5574 if (entry->is_sub_map) {
5575 vm_map_reference(VME_SUBMAP(new_entry));
5576 } else {
5577 vm_object_reference(VME_OBJECT(new_entry));
5578 }
5579 }
5580
5581
5582 /*
5583 * vm_map_clip_end: [ internal use only ]
5584 *
5585 * Asserts that the given entry ends at or before
5586 * the specified address; if necessary,
5587 * it splits the entry into two.
5588 */
5589 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5590 vm_map_clip_end(
5591 vm_map_t map,
5592 vm_map_entry_t entry,
5593 vm_map_offset_t endaddr)
5594 {
5595 if (endaddr > entry->vme_end) {
5596 /*
5597 * Within the scope of this clipping, limit "endaddr" to
5598 * the end of this map entry...
5599 */
5600 endaddr = entry->vme_end;
5601 }
5602 #ifndef NO_NESTED_PMAP
5603 if (entry->is_sub_map && entry->use_pmap) {
5604 vm_map_offset_t start_unnest, end_unnest;
5605
5606 /*
5607 * Make sure the range between the start of this entry and
5608 * the new "endaddr" is no longer nested before we clip.
5609 * Unnest only the minimum range the platform can handle.
5610 * vm_map_clip_unnest may perform additional adjustments to
5611 * the unnest range.
5612 */
5613 start_unnest = entry->vme_start;
5614 end_unnest =
5615 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5616 ~(pmap_shared_region_size_min(map->pmap) - 1);
5617 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5618 }
5619 #endif /* NO_NESTED_PMAP */
5620 if (endaddr < entry->vme_end) {
5621 if (!entry->is_sub_map &&
5622 VME_OBJECT(entry) &&
5623 VME_OBJECT(entry)->phys_contiguous) {
5624 pmap_remove(map->pmap,
5625 (addr64_t)(entry->vme_start),
5626 (addr64_t)(entry->vme_end));
5627 }
5628 if (entry->vme_atomic) {
5629 __vm_map_clip_atomic_entry_panic(map, entry, endaddr);
5630 }
5631 DTRACE_VM5(
5632 vm_map_clip_end,
5633 vm_map_t, map,
5634 vm_map_offset_t, entry->vme_start,
5635 vm_map_offset_t, entry->vme_end,
5636 vm_map_offset_t, endaddr,
5637 int, VME_ALIAS(entry));
5638
5639 _vm_map_clip_end(&map->hdr, entry, endaddr);
5640 if (map->holelistenabled) {
5641 vm_map_store_update_first_free(map, NULL, FALSE);
5642 } else {
5643 vm_map_store_update_first_free(map, map->first_free, FALSE);
5644 }
5645 }
5646 }
5647
5648
5649 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5650 MACRO_BEGIN \
5651 if ((endaddr) < (entry)->vme_end) \
5652 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5653 MACRO_END
5654
5655 /*
5656 * This routine is called only when it is known that
5657 * the entry must be split.
5658 */
5659 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5660 _vm_map_clip_end(
5661 struct vm_map_header *map_header,
5662 vm_map_entry_t entry,
5663 vm_map_offset_t end)
5664 {
5665 vm_map_entry_t new_entry;
5666
5667 /*
5668 * Create a new entry and insert it
5669 * AFTER the specified entry
5670 */
5671
5672 if (entry->map_aligned) {
5673 assert(VM_MAP_PAGE_ALIGNED(end,
5674 VM_MAP_HDR_PAGE_MASK(map_header)));
5675 }
5676
5677 new_entry = _vm_map_entry_create(map_header);
5678 vm_map_entry_copy_full(new_entry, entry);
5679
5680 if (__improbable(end <= entry->vme_start)) {
5681 panic("mapHdr %p entry %p start 0x%llx end 0x%llx new end 0x%llx", map_header, entry, entry->vme_start, entry->vme_end, end);
5682 }
5683 assert(entry->vme_start < end);
5684 new_entry->vme_start = entry->vme_end = end;
5685 VME_OFFSET_SET(new_entry,
5686 VME_OFFSET(new_entry) + (end - entry->vme_start));
5687 assert(new_entry->vme_start < new_entry->vme_end);
5688
5689 _vm_map_store_entry_link(map_header, entry, new_entry);
5690
5691 if (entry->is_sub_map) {
5692 vm_map_reference(VME_SUBMAP(new_entry));
5693 } else {
5694 vm_object_reference(VME_OBJECT(new_entry));
5695 }
5696 }
5697
5698
5699 /*
5700 * VM_MAP_RANGE_CHECK: [ internal use only ]
5701 *
5702 * Asserts that the starting and ending region
5703 * addresses fall within the valid range of the map.
5704 */
5705 #define VM_MAP_RANGE_CHECK(map, start, end) \
5706 MACRO_BEGIN \
5707 if (start < vm_map_min(map)) \
5708 start = vm_map_min(map); \
5709 if (end > vm_map_max(map)) \
5710 end = vm_map_max(map); \
5711 if (start > end) \
5712 start = end; \
5713 MACRO_END
5714
5715 /*
5716 * vm_map_range_check: [ internal use only ]
5717 *
5718 * Check that the region defined by the specified start and
5719 * end addresses are wholly contained within a single map
5720 * entry or set of adjacent map entries of the spacified map,
5721 * i.e. the specified region contains no unmapped space.
5722 * If any or all of the region is unmapped, FALSE is returned.
5723 * Otherwise, TRUE is returned and if the output argument 'entry'
5724 * is not NULL it points to the map entry containing the start
5725 * of the region.
5726 *
5727 * The map is locked for reading on entry and is left locked.
5728 */
5729 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5730 vm_map_range_check(
5731 vm_map_t map,
5732 vm_map_offset_t start,
5733 vm_map_offset_t end,
5734 vm_map_entry_t *entry)
5735 {
5736 vm_map_entry_t cur;
5737 vm_map_offset_t prev;
5738
5739 /*
5740 * Basic sanity checks first
5741 */
5742 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5743 return FALSE;
5744 }
5745
5746 /*
5747 * Check first if the region starts within a valid
5748 * mapping for the map.
5749 */
5750 if (!vm_map_lookup_entry(map, start, &cur)) {
5751 return FALSE;
5752 }
5753
5754 /*
5755 * Optimize for the case that the region is contained
5756 * in a single map entry.
5757 */
5758 if (entry != (vm_map_entry_t *) NULL) {
5759 *entry = cur;
5760 }
5761 if (end <= cur->vme_end) {
5762 return TRUE;
5763 }
5764
5765 /*
5766 * If the region is not wholly contained within a
5767 * single entry, walk the entries looking for holes.
5768 */
5769 prev = cur->vme_end;
5770 cur = cur->vme_next;
5771 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5772 if (end <= cur->vme_end) {
5773 return TRUE;
5774 }
5775 prev = cur->vme_end;
5776 cur = cur->vme_next;
5777 }
5778 return FALSE;
5779 }
5780
5781 /*
5782 * vm_map_protect:
5783 *
5784 * Sets the protection of the specified address
5785 * region in the target map. If "set_max" is
5786 * specified, the maximum protection is to be set;
5787 * otherwise, only the current protection is affected.
5788 */
5789 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5790 vm_map_protect(
5791 vm_map_t map,
5792 vm_map_offset_t start,
5793 vm_map_offset_t end,
5794 vm_prot_t new_prot,
5795 boolean_t set_max)
5796 {
5797 vm_map_entry_t current;
5798 vm_map_offset_t prev;
5799 vm_map_entry_t entry;
5800 vm_prot_t new_max;
5801 int pmap_options = 0;
5802 kern_return_t kr;
5803
5804 if (vm_map_range_overflows(map, start, end - start)) {
5805 return KERN_INVALID_ARGUMENT;
5806 }
5807
5808 if (new_prot & VM_PROT_COPY) {
5809 vm_map_offset_t new_start;
5810 vm_prot_t cur_prot, max_prot;
5811 vm_map_kernel_flags_t kflags;
5812
5813 /* LP64todo - see below */
5814 if (start >= map->max_offset) {
5815 return KERN_INVALID_ADDRESS;
5816 }
5817
5818 if ((new_prot & VM_PROT_ALLEXEC) &&
5819 map->pmap != kernel_pmap &&
5820 (vm_map_cs_enforcement(map)
5821 #if XNU_TARGET_OS_OSX && __arm64__
5822 || !VM_MAP_IS_EXOTIC(map)
5823 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5824 ) &&
5825 VM_MAP_POLICY_WX_FAIL(map)) {
5826 DTRACE_VM3(cs_wx,
5827 uint64_t, (uint64_t) start,
5828 uint64_t, (uint64_t) end,
5829 vm_prot_t, new_prot);
5830 printf("CODE SIGNING: %d[%s] %s:%d(0x%llx,0x%llx,0x%x) can't have both write and exec at the same time\n",
5831 proc_selfpid(),
5832 (get_bsdtask_info(current_task())
5833 ? proc_name_address(get_bsdtask_info(current_task()))
5834 : "?"),
5835 __FUNCTION__, __LINE__,
5836 #if DEVELOPMENT || DEBUG
5837 (uint64_t)start,
5838 (uint64_t)end,
5839 #else /* DEVELOPMENT || DEBUG */
5840 (uint64_t)0,
5841 (uint64_t)0,
5842 #endif /* DEVELOPMENT || DEBUG */
5843 new_prot);
5844 return KERN_PROTECTION_FAILURE;
5845 }
5846
5847 /*
5848 * Let vm_map_remap_extract() know that it will need to:
5849 * + make a copy of the mapping
5850 * + add VM_PROT_WRITE to the max protections
5851 * + remove any protections that are no longer allowed from the
5852 * max protections (to avoid any WRITE/EXECUTE conflict, for
5853 * example).
5854 * Note that "max_prot" is an IN/OUT parameter only for this
5855 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5856 * only.
5857 */
5858 max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5859 cur_prot = VM_PROT_NONE;
5860 kflags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
5861 kflags.vmkf_remap_prot_copy = true;
5862 new_start = start;
5863 kr = vm_map_remap(map,
5864 &new_start,
5865 end - start,
5866 0, /* mask */
5867 kflags,
5868 map,
5869 start,
5870 TRUE, /* copy-on-write remapping! */
5871 &cur_prot, /* IN/OUT */
5872 &max_prot, /* IN/OUT */
5873 VM_INHERIT_DEFAULT);
5874 if (kr != KERN_SUCCESS) {
5875 return kr;
5876 }
5877 new_prot &= ~VM_PROT_COPY;
5878 }
5879
5880 vm_map_lock(map);
5881
5882 /* LP64todo - remove this check when vm_map_commpage64()
5883 * no longer has to stuff in a map_entry for the commpage
5884 * above the map's max_offset.
5885 */
5886 if (start >= map->max_offset) {
5887 vm_map_unlock(map);
5888 return KERN_INVALID_ADDRESS;
5889 }
5890
5891 while (1) {
5892 /*
5893 * Lookup the entry. If it doesn't start in a valid
5894 * entry, return an error.
5895 */
5896 if (!vm_map_lookup_entry(map, start, &entry)) {
5897 vm_map_unlock(map);
5898 return KERN_INVALID_ADDRESS;
5899 }
5900
5901 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5902 start = SUPERPAGE_ROUND_DOWN(start);
5903 continue;
5904 }
5905 break;
5906 }
5907 if (entry->superpage_size) {
5908 end = SUPERPAGE_ROUND_UP(end);
5909 }
5910
5911 /*
5912 * Make a first pass to check for protection and address
5913 * violations.
5914 */
5915
5916 current = entry;
5917 prev = current->vme_start;
5918 while ((current != vm_map_to_entry(map)) &&
5919 (current->vme_start < end)) {
5920 /*
5921 * If there is a hole, return an error.
5922 */
5923 if (current->vme_start != prev) {
5924 vm_map_unlock(map);
5925 return KERN_INVALID_ADDRESS;
5926 }
5927
5928 new_max = current->max_protection;
5929
5930 #if defined(__x86_64__)
5931 /* Allow max mask to include execute prot bits if this map doesn't enforce CS */
5932 if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
5933 new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
5934 }
5935 #elif CODE_SIGNING_MONITOR
5936 if (set_max && (new_prot & VM_PROT_EXECUTE) && (csm_address_space_exempt(map->pmap) == KERN_SUCCESS)) {
5937 new_max |= VM_PROT_EXECUTE;
5938 }
5939 #endif
5940 if ((new_prot & new_max) != new_prot) {
5941 vm_map_unlock(map);
5942 return KERN_PROTECTION_FAILURE;
5943 }
5944
5945 if (current->used_for_jit &&
5946 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
5947 vm_map_unlock(map);
5948 return KERN_PROTECTION_FAILURE;
5949 }
5950
5951 #if __arm64e__
5952 /* Disallow remapping hw assisted TPRO mappings */
5953 if (current->used_for_tpro) {
5954 vm_map_unlock(map);
5955 return KERN_PROTECTION_FAILURE;
5956 }
5957 #endif /* __arm64e__ */
5958
5959
5960 if ((new_prot & VM_PROT_WRITE) &&
5961 (new_prot & VM_PROT_ALLEXEC) &&
5962 #if XNU_TARGET_OS_OSX
5963 map->pmap != kernel_pmap &&
5964 (vm_map_cs_enforcement(map)
5965 #if __arm64__
5966 || !VM_MAP_IS_EXOTIC(map)
5967 #endif /* __arm64__ */
5968 ) &&
5969 #endif /* XNU_TARGET_OS_OSX */
5970 #if CODE_SIGNING_MONITOR
5971 (csm_address_space_exempt(map->pmap) != KERN_SUCCESS) &&
5972 #endif
5973 !(current->used_for_jit)) {
5974 DTRACE_VM3(cs_wx,
5975 uint64_t, (uint64_t) current->vme_start,
5976 uint64_t, (uint64_t) current->vme_end,
5977 vm_prot_t, new_prot);
5978 printf("CODE SIGNING: %d[%s] %s:%d(0x%llx,0x%llx,0x%x) can't have both write and exec at the same time\n",
5979 proc_selfpid(),
5980 (get_bsdtask_info(current_task())
5981 ? proc_name_address(get_bsdtask_info(current_task()))
5982 : "?"),
5983 __FUNCTION__, __LINE__,
5984 #if DEVELOPMENT || DEBUG
5985 (uint64_t)current->vme_start,
5986 (uint64_t)current->vme_end,
5987 #else /* DEVELOPMENT || DEBUG */
5988 (uint64_t)0,
5989 (uint64_t)0,
5990 #endif /* DEVELOPMENT || DEBUG */
5991 new_prot);
5992 new_prot &= ~VM_PROT_ALLEXEC;
5993 if (VM_MAP_POLICY_WX_FAIL(map)) {
5994 vm_map_unlock(map);
5995 return KERN_PROTECTION_FAILURE;
5996 }
5997 }
5998
5999 /*
6000 * If the task has requested executable lockdown,
6001 * deny both:
6002 * - adding executable protections OR
6003 * - adding write protections to an existing executable mapping.
6004 */
6005 if (map->map_disallow_new_exec == TRUE) {
6006 if ((new_prot & VM_PROT_ALLEXEC) ||
6007 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6008 vm_map_unlock(map);
6009 return KERN_PROTECTION_FAILURE;
6010 }
6011 }
6012
6013 prev = current->vme_end;
6014 current = current->vme_next;
6015 }
6016
6017 #if __arm64__
6018 if (end > prev &&
6019 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6020 vm_map_entry_t prev_entry;
6021
6022 prev_entry = current->vme_prev;
6023 if (prev_entry != vm_map_to_entry(map) &&
6024 !prev_entry->map_aligned &&
6025 (vm_map_round_page(prev_entry->vme_end,
6026 VM_MAP_PAGE_MASK(map))
6027 == end)) {
6028 /*
6029 * The last entry in our range is not "map-aligned"
6030 * but it would have reached all the way to "end"
6031 * if it had been map-aligned, so this is not really
6032 * a hole in the range and we can proceed.
6033 */
6034 prev = end;
6035 }
6036 }
6037 #endif /* __arm64__ */
6038
6039 if (end > prev) {
6040 vm_map_unlock(map);
6041 return KERN_INVALID_ADDRESS;
6042 }
6043
6044 /*
6045 * Go back and fix up protections.
6046 * Clip to start here if the range starts within
6047 * the entry.
6048 */
6049
6050 current = entry;
6051 if (current != vm_map_to_entry(map)) {
6052 /* clip and unnest if necessary */
6053 vm_map_clip_start(map, current, start);
6054 }
6055
6056 while ((current != vm_map_to_entry(map)) &&
6057 (current->vme_start < end)) {
6058 vm_prot_t old_prot;
6059
6060 vm_map_clip_end(map, current, end);
6061
6062 #if DEVELOPMENT || DEBUG
6063 if (current->csm_associated && vm_log_xnu_user_debug) {
6064 printf("FBDP %d[%s] %s(0x%llx,0x%llx,0x%x) on map %p entry %p [0x%llx:0x%llx 0x%x/0x%x] csm_associated\n",
6065 proc_selfpid(),
6066 (get_bsdtask_info(current_task())
6067 ? proc_name_address(get_bsdtask_info(current_task()))
6068 : "?"),
6069 __FUNCTION__,
6070 (uint64_t)start,
6071 (uint64_t)end,
6072 new_prot,
6073 map, current,
6074 current->vme_start,
6075 current->vme_end,
6076 current->protection,
6077 current->max_protection);
6078 }
6079 #endif /* DEVELOPMENT || DEBUG */
6080
6081 if (current->is_sub_map) {
6082 /* clipping did unnest if needed */
6083 assert(!current->use_pmap);
6084 }
6085
6086 old_prot = current->protection;
6087
6088 if (set_max) {
6089 current->max_protection = new_prot;
6090 /* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6091 current->protection = (new_prot & old_prot);
6092 } else {
6093 current->protection = new_prot;
6094 }
6095
6096 #if CODE_SIGNING_MONITOR
6097 if (!current->vme_xnu_user_debug &&
6098 /* a !csm_associated mapping becoming executable */
6099 ((!current->csm_associated &&
6100 !(old_prot & VM_PROT_EXECUTE) &&
6101 (current->protection & VM_PROT_EXECUTE))
6102 ||
6103 /* a csm_associated mapping becoming writable */
6104 (current->csm_associated &&
6105 !(old_prot & VM_PROT_WRITE) &&
6106 (current->protection & VM_PROT_WRITE)))) {
6107 /*
6108 * This mapping has not already been marked as
6109 * "user_debug" and it is either:
6110 * 1. not code-signing-monitored and becoming executable
6111 * 2. code-signing-monitored and becoming writable,
6112 * so inform the CodeSigningMonitor and mark the
6113 * mapping as "user_debug" if appropriate.
6114 */
6115 vm_map_kernel_flags_t vmk_flags;
6116 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
6117 /* pretend it's a vm_protect(VM_PROT_COPY)... */
6118 vmk_flags.vmkf_remap_prot_copy = true;
6119 kr = vm_map_entry_cs_associate(map, current, vmk_flags);
6120 #if DEVELOPMENT || DEBUG
6121 if (vm_log_xnu_user_debug) {
6122 printf("FBDP %d[%s] %s:%d map %p entry %p [ 0x%llx 0x%llx ] prot 0x%x -> 0x%x cs_associate -> %d user_debug=%d\n",
6123 proc_selfpid(),
6124 (get_bsdtask_info(current_task()) ? proc_name_address(get_bsdtask_info(current_task())) : "?"),
6125 __FUNCTION__, __LINE__,
6126 map, current,
6127 current->vme_start, current->vme_end,
6128 old_prot, current->protection,
6129 kr, current->vme_xnu_user_debug);
6130 }
6131 #endif /* DEVELOPMENT || DEBUG */
6132 }
6133 #endif /* CODE_SIGNING_MONITOR */
6134
6135 /*
6136 * Update physical map if necessary.
6137 * If the request is to turn off write protection,
6138 * we won't do it for real (in pmap). This is because
6139 * it would cause copy-on-write to fail. We've already
6140 * set, the new protection in the map, so if a
6141 * write-protect fault occurred, it will be fixed up
6142 * properly, COW or not.
6143 */
6144 if (current->protection != old_prot) {
6145 /* Look one level in we support nested pmaps */
6146 /* from mapped submaps which are direct entries */
6147 /* in our map */
6148
6149 vm_prot_t prot;
6150
6151 prot = current->protection;
6152 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6153 prot &= ~VM_PROT_WRITE;
6154 } else {
6155 assert(!VME_OBJECT(current)->code_signed);
6156 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6157 if (prot & VM_PROT_WRITE) {
6158 /*
6159 * For write requests on the
6160 * compressor, we wil ask the
6161 * pmap layer to prevent us from
6162 * taking a write fault when we
6163 * attempt to access the mapping
6164 * next.
6165 */
6166 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6167 }
6168 }
6169
6170 if (override_nx(map, VME_ALIAS(current)) && prot) {
6171 prot |= VM_PROT_EXECUTE;
6172 }
6173
6174 #if DEVELOPMENT || DEBUG
6175 if (!(old_prot & VM_PROT_EXECUTE) &&
6176 (prot & VM_PROT_EXECUTE) &&
6177 panic_on_unsigned_execute &&
6178 (proc_selfcsflags() & CS_KILL)) {
6179 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6180 }
6181 #endif /* DEVELOPMENT || DEBUG */
6182
6183 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6184 if (current->wired_count) {
6185 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6186 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6187 }
6188
6189 /* If the pmap layer cares about this
6190 * protection type, force a fault for
6191 * each page so that vm_fault will
6192 * repopulate the page with the full
6193 * set of protections.
6194 */
6195 /*
6196 * TODO: We don't seem to need this,
6197 * but this is due to an internal
6198 * implementation detail of
6199 * pmap_protect. Do we want to rely
6200 * on this?
6201 */
6202 prot = VM_PROT_NONE;
6203 }
6204
6205 if (current->is_sub_map && current->use_pmap) {
6206 pmap_protect(VME_SUBMAP(current)->pmap,
6207 current->vme_start,
6208 current->vme_end,
6209 prot);
6210 } else {
6211 pmap_protect_options(map->pmap,
6212 current->vme_start,
6213 current->vme_end,
6214 prot,
6215 pmap_options,
6216 NULL);
6217 }
6218 }
6219 current = current->vme_next;
6220 }
6221
6222 current = entry;
6223 while ((current != vm_map_to_entry(map)) &&
6224 (current->vme_start <= end)) {
6225 vm_map_simplify_entry(map, current);
6226 current = current->vme_next;
6227 }
6228
6229 vm_map_unlock(map);
6230 return KERN_SUCCESS;
6231 }
6232
6233 /*
6234 * vm_map_inherit:
6235 *
6236 * Sets the inheritance of the specified address
6237 * range in the target map. Inheritance
6238 * affects how the map will be shared with
6239 * child maps at the time of vm_map_fork.
6240 */
6241 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6242 vm_map_inherit(
6243 vm_map_t map,
6244 vm_map_offset_t start,
6245 vm_map_offset_t end,
6246 vm_inherit_t new_inheritance)
6247 {
6248 vm_map_entry_t entry;
6249 vm_map_entry_t temp_entry;
6250
6251 vm_map_lock(map);
6252
6253 VM_MAP_RANGE_CHECK(map, start, end);
6254
6255 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
6256 return KERN_INVALID_ADDRESS;
6257 }
6258
6259 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6260 entry = temp_entry;
6261 } else {
6262 temp_entry = temp_entry->vme_next;
6263 entry = temp_entry;
6264 }
6265
6266 /* first check entire range for submaps which can't support the */
6267 /* given inheritance. */
6268 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6269 if (entry->is_sub_map) {
6270 if (new_inheritance == VM_INHERIT_COPY) {
6271 vm_map_unlock(map);
6272 return KERN_INVALID_ARGUMENT;
6273 }
6274 }
6275
6276 entry = entry->vme_next;
6277 }
6278
6279 entry = temp_entry;
6280 if (entry != vm_map_to_entry(map)) {
6281 /* clip and unnest if necessary */
6282 vm_map_clip_start(map, entry, start);
6283 }
6284
6285 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6286 vm_map_clip_end(map, entry, end);
6287 if (entry->is_sub_map) {
6288 /* clip did unnest if needed */
6289 assert(!entry->use_pmap);
6290 }
6291
6292 entry->inheritance = new_inheritance;
6293
6294 entry = entry->vme_next;
6295 }
6296
6297 vm_map_unlock(map);
6298 return KERN_SUCCESS;
6299 }
6300
6301 /*
6302 * Update the accounting for the amount of wired memory in this map. If the user has
6303 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6304 */
6305
6306 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6307 add_wire_counts(
6308 vm_map_t map,
6309 vm_map_entry_t entry,
6310 boolean_t user_wire)
6311 {
6312 vm_map_size_t size;
6313
6314 if (user_wire) {
6315 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6316
6317 /*
6318 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6319 * this map entry.
6320 */
6321
6322 if (entry->user_wired_count == 0) {
6323 size = entry->vme_end - entry->vme_start;
6324
6325 /*
6326 * Since this is the first time the user is wiring this map entry, check to see if we're
6327 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6328 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6329 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6330 * limit, then we fail.
6331 */
6332
6333 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6334 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6335 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6336 #if DEVELOPMENT || DEBUG
6337 if (panic_on_mlock_failure) {
6338 panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6339 }
6340 #endif /* DEVELOPMENT || DEBUG */
6341 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6342 } else {
6343 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6344 #if DEVELOPMENT || DEBUG
6345 if (panic_on_mlock_failure) {
6346 panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6347 }
6348 #endif /* DEVELOPMENT || DEBUG */
6349 }
6350 return KERN_RESOURCE_SHORTAGE;
6351 }
6352
6353 /*
6354 * The first time the user wires an entry, we also increment the wired_count and add this to
6355 * the total that has been wired in the map.
6356 */
6357
6358 if (entry->wired_count >= MAX_WIRE_COUNT) {
6359 return KERN_FAILURE;
6360 }
6361
6362 entry->wired_count++;
6363 map->user_wire_size += size;
6364 }
6365
6366 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6367 return KERN_FAILURE;
6368 }
6369
6370 entry->user_wired_count++;
6371 } else {
6372 /*
6373 * The kernel's wiring the memory. Just bump the count and continue.
6374 */
6375
6376 if (entry->wired_count >= MAX_WIRE_COUNT) {
6377 panic("vm_map_wire: too many wirings");
6378 }
6379
6380 entry->wired_count++;
6381 }
6382
6383 return KERN_SUCCESS;
6384 }
6385
6386 /*
6387 * Update the memory wiring accounting now that the given map entry is being unwired.
6388 */
6389
6390 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6391 subtract_wire_counts(
6392 vm_map_t map,
6393 vm_map_entry_t entry,
6394 boolean_t user_wire)
6395 {
6396 if (user_wire) {
6397 /*
6398 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6399 */
6400
6401 if (entry->user_wired_count == 1) {
6402 /*
6403 * We're removing the last user wire reference. Decrement the wired_count and the total
6404 * user wired memory for this map.
6405 */
6406
6407 assert(entry->wired_count >= 1);
6408 entry->wired_count--;
6409 map->user_wire_size -= entry->vme_end - entry->vme_start;
6410 }
6411
6412 assert(entry->user_wired_count >= 1);
6413 entry->user_wired_count--;
6414 } else {
6415 /*
6416 * The kernel is unwiring the memory. Just update the count.
6417 */
6418
6419 assert(entry->wired_count >= 1);
6420 entry->wired_count--;
6421 }
6422 }
6423
6424 int cs_executable_wire = 0;
6425
6426 /*
6427 * vm_map_wire:
6428 *
6429 * Sets the pageability of the specified address range in the
6430 * target map as wired. Regions specified as not pageable require
6431 * locked-down physical memory and physical page maps. The
6432 * access_type variable indicates types of accesses that must not
6433 * generate page faults. This is checked against protection of
6434 * memory being locked-down.
6435 *
6436 * The map must not be locked, but a reference must remain to the
6437 * map throughout the call.
6438 */
6439 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6440 vm_map_wire_nested(
6441 vm_map_t map,
6442 vm_map_offset_t start,
6443 vm_map_offset_t end,
6444 vm_prot_t caller_prot,
6445 vm_tag_t tag,
6446 boolean_t user_wire,
6447 pmap_t map_pmap,
6448 vm_map_offset_t pmap_addr,
6449 ppnum_t *physpage_p)
6450 {
6451 vm_map_entry_t entry;
6452 vm_prot_t access_type;
6453 struct vm_map_entry *first_entry, tmp_entry;
6454 vm_map_t real_map;
6455 vm_map_offset_t s, e;
6456 kern_return_t rc;
6457 boolean_t need_wakeup;
6458 boolean_t main_map = FALSE;
6459 wait_interrupt_t interruptible_state;
6460 thread_t cur_thread;
6461 unsigned int last_timestamp;
6462 vm_map_size_t size;
6463 boolean_t wire_and_extract;
6464 vm_prot_t extra_prots;
6465
6466 extra_prots = VM_PROT_COPY;
6467 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6468 #if XNU_TARGET_OS_OSX
6469 if (map->pmap == kernel_pmap ||
6470 !vm_map_cs_enforcement(map)) {
6471 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6472 }
6473 #endif /* XNU_TARGET_OS_OSX */
6474 #if CODE_SIGNING_MONITOR
6475 if (csm_address_space_exempt(map->pmap) == KERN_SUCCESS) {
6476 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6477 }
6478 #endif /* CODE_SIGNING_MONITOR */
6479
6480 access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6481
6482 wire_and_extract = FALSE;
6483 if (physpage_p != NULL) {
6484 /*
6485 * The caller wants the physical page number of the
6486 * wired page. We return only one physical page number
6487 * so this works for only one page at a time.
6488 */
6489 if ((end - start) != PAGE_SIZE) {
6490 return KERN_INVALID_ARGUMENT;
6491 }
6492 wire_and_extract = TRUE;
6493 *physpage_p = 0;
6494 }
6495
6496 vm_map_lock(map);
6497 if (map_pmap == NULL) {
6498 main_map = TRUE;
6499 }
6500 last_timestamp = map->timestamp;
6501
6502 VM_MAP_RANGE_CHECK(map, start, end);
6503 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6504 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6505
6506 if (start == end) {
6507 /* We wired what the caller asked for, zero pages */
6508 vm_map_unlock(map);
6509 return KERN_SUCCESS;
6510 }
6511
6512 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
6513 return KERN_INVALID_ADDRESS;
6514 }
6515
6516 need_wakeup = FALSE;
6517 cur_thread = current_thread();
6518
6519 s = start;
6520 rc = KERN_SUCCESS;
6521
6522 if (vm_map_lookup_entry(map, s, &first_entry)) {
6523 entry = first_entry;
6524 /*
6525 * vm_map_clip_start will be done later.
6526 * We don't want to unnest any nested submaps here !
6527 */
6528 } else {
6529 /* Start address is not in map */
6530 rc = KERN_INVALID_ADDRESS;
6531 goto done;
6532 }
6533
6534 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6535 /*
6536 * At this point, we have wired from "start" to "s".
6537 * We still need to wire from "s" to "end".
6538 *
6539 * "entry" hasn't been clipped, so it could start before "s"
6540 * and/or end after "end".
6541 */
6542
6543 /* "e" is how far we want to wire in this entry */
6544 e = entry->vme_end;
6545 if (e > end) {
6546 e = end;
6547 }
6548
6549 /*
6550 * If another thread is wiring/unwiring this entry then
6551 * block after informing other thread to wake us up.
6552 */
6553 if (entry->in_transition) {
6554 wait_result_t wait_result;
6555
6556 /*
6557 * We have not clipped the entry. Make sure that
6558 * the start address is in range so that the lookup
6559 * below will succeed.
6560 * "s" is the current starting point: we've already
6561 * wired from "start" to "s" and we still have
6562 * to wire from "s" to "end".
6563 */
6564
6565 entry->needs_wakeup = TRUE;
6566
6567 /*
6568 * wake up anybody waiting on entries that we have
6569 * already wired.
6570 */
6571 if (need_wakeup) {
6572 vm_map_entry_wakeup(map);
6573 need_wakeup = FALSE;
6574 }
6575 /*
6576 * User wiring is interruptible
6577 */
6578 wait_result = vm_map_entry_wait(map,
6579 (user_wire) ? THREAD_ABORTSAFE :
6580 THREAD_UNINT);
6581 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6582 /*
6583 * undo the wirings we have done so far
6584 * We do not clear the needs_wakeup flag,
6585 * because we cannot tell if we were the
6586 * only one waiting.
6587 */
6588 rc = KERN_FAILURE;
6589 goto done;
6590 }
6591
6592 /*
6593 * Cannot avoid a lookup here. reset timestamp.
6594 */
6595 last_timestamp = map->timestamp;
6596
6597 /*
6598 * The entry could have been clipped, look it up again.
6599 * Worse that can happen is, it may not exist anymore.
6600 */
6601 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6602 /*
6603 * User: undo everything upto the previous
6604 * entry. let vm_map_unwire worry about
6605 * checking the validity of the range.
6606 */
6607 rc = KERN_FAILURE;
6608 goto done;
6609 }
6610 entry = first_entry;
6611 continue;
6612 }
6613
6614 if (entry->is_sub_map) {
6615 vm_map_offset_t sub_start;
6616 vm_map_offset_t sub_end;
6617 vm_map_offset_t local_start;
6618 vm_map_offset_t local_end;
6619 pmap_t pmap;
6620
6621 if (wire_and_extract) {
6622 /*
6623 * Wiring would result in copy-on-write
6624 * which would not be compatible with
6625 * the sharing we have with the original
6626 * provider of this memory.
6627 */
6628 rc = KERN_INVALID_ARGUMENT;
6629 goto done;
6630 }
6631
6632 vm_map_clip_start(map, entry, s);
6633 vm_map_clip_end(map, entry, end);
6634
6635 sub_start = VME_OFFSET(entry);
6636 sub_end = entry->vme_end;
6637 sub_end += VME_OFFSET(entry) - entry->vme_start;
6638
6639 local_end = entry->vme_end;
6640 if (map_pmap == NULL) {
6641 vm_object_t object;
6642 vm_object_offset_t offset;
6643 vm_prot_t prot;
6644 boolean_t wired;
6645 vm_map_entry_t local_entry;
6646 vm_map_version_t version;
6647 vm_map_t lookup_map;
6648
6649 if (entry->use_pmap) {
6650 pmap = VME_SUBMAP(entry)->pmap;
6651 /* ppc implementation requires that */
6652 /* submaps pmap address ranges line */
6653 /* up with parent map */
6654 #ifdef notdef
6655 pmap_addr = sub_start;
6656 #endif
6657 pmap_addr = s;
6658 } else {
6659 pmap = map->pmap;
6660 pmap_addr = s;
6661 }
6662
6663 if (entry->wired_count) {
6664 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6665 goto done;
6666 }
6667
6668 /*
6669 * The map was not unlocked:
6670 * no need to goto re-lookup.
6671 * Just go directly to next entry.
6672 */
6673 entry = entry->vme_next;
6674 s = entry->vme_start;
6675 continue;
6676 }
6677
6678 /* call vm_map_lookup_and_lock_object to */
6679 /* cause any needs copy to be */
6680 /* evaluated */
6681 local_start = entry->vme_start;
6682 lookup_map = map;
6683 vm_map_lock_write_to_read(map);
6684 rc = vm_map_lookup_and_lock_object(
6685 &lookup_map, local_start,
6686 (access_type | extra_prots),
6687 OBJECT_LOCK_EXCLUSIVE,
6688 &version, &object,
6689 &offset, &prot, &wired,
6690 NULL,
6691 &real_map, NULL);
6692 if (rc != KERN_SUCCESS) {
6693 vm_map_unlock_read(lookup_map);
6694 assert(map_pmap == NULL);
6695 vm_map_unwire(map, start,
6696 s, user_wire);
6697 return rc;
6698 }
6699 vm_object_unlock(object);
6700 if (real_map != lookup_map) {
6701 vm_map_unlock(real_map);
6702 }
6703 vm_map_unlock_read(lookup_map);
6704 vm_map_lock(map);
6705
6706 /* we unlocked, so must re-lookup */
6707 if (!vm_map_lookup_entry(map,
6708 local_start,
6709 &local_entry)) {
6710 rc = KERN_FAILURE;
6711 goto done;
6712 }
6713
6714 /*
6715 * entry could have been "simplified",
6716 * so re-clip
6717 */
6718 entry = local_entry;
6719 assert(s == local_start);
6720 vm_map_clip_start(map, entry, s);
6721 vm_map_clip_end(map, entry, end);
6722 /* re-compute "e" */
6723 e = entry->vme_end;
6724 if (e > end) {
6725 e = end;
6726 }
6727
6728 /* did we have a change of type? */
6729 if (!entry->is_sub_map) {
6730 last_timestamp = map->timestamp;
6731 continue;
6732 }
6733 } else {
6734 local_start = entry->vme_start;
6735 pmap = map_pmap;
6736 }
6737
6738 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6739 goto done;
6740 }
6741
6742 entry->in_transition = TRUE;
6743
6744 vm_map_unlock(map);
6745 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6746 sub_start, sub_end,
6747 caller_prot, tag,
6748 user_wire, pmap, pmap_addr,
6749 NULL);
6750 vm_map_lock(map);
6751
6752 /*
6753 * Find the entry again. It could have been clipped
6754 * after we unlocked the map.
6755 */
6756 if (!vm_map_lookup_entry(map, local_start,
6757 &first_entry)) {
6758 panic("vm_map_wire: re-lookup failed");
6759 }
6760 entry = first_entry;
6761
6762 assert(local_start == s);
6763 /* re-compute "e" */
6764 e = entry->vme_end;
6765 if (e > end) {
6766 e = end;
6767 }
6768
6769 last_timestamp = map->timestamp;
6770 while ((entry != vm_map_to_entry(map)) &&
6771 (entry->vme_start < e)) {
6772 assert(entry->in_transition);
6773 entry->in_transition = FALSE;
6774 if (entry->needs_wakeup) {
6775 entry->needs_wakeup = FALSE;
6776 need_wakeup = TRUE;
6777 }
6778 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6779 subtract_wire_counts(map, entry, user_wire);
6780 }
6781 entry = entry->vme_next;
6782 }
6783 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6784 goto done;
6785 }
6786
6787 /* no need to relookup again */
6788 s = entry->vme_start;
6789 continue;
6790 }
6791
6792 /*
6793 * If this entry is already wired then increment
6794 * the appropriate wire reference count.
6795 */
6796 if (entry->wired_count) {
6797 if ((entry->protection & access_type) != access_type) {
6798 /* found a protection problem */
6799
6800 /*
6801 * XXX FBDP
6802 * We should always return an error
6803 * in this case but since we didn't
6804 * enforce it before, let's do
6805 * it only for the new "wire_and_extract"
6806 * code path for now...
6807 */
6808 if (wire_and_extract) {
6809 rc = KERN_PROTECTION_FAILURE;
6810 goto done;
6811 }
6812 }
6813
6814 /*
6815 * entry is already wired down, get our reference
6816 * after clipping to our range.
6817 */
6818 vm_map_clip_start(map, entry, s);
6819 vm_map_clip_end(map, entry, end);
6820
6821 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6822 goto done;
6823 }
6824
6825 if (wire_and_extract) {
6826 vm_object_t object;
6827 vm_object_offset_t offset;
6828 vm_page_t m;
6829
6830 /*
6831 * We don't have to "wire" the page again
6832 * bit we still have to "extract" its
6833 * physical page number, after some sanity
6834 * checks.
6835 */
6836 assert((entry->vme_end - entry->vme_start)
6837 == PAGE_SIZE);
6838 assert(!entry->needs_copy);
6839 assert(!entry->is_sub_map);
6840 assert(VME_OBJECT(entry));
6841 if (((entry->vme_end - entry->vme_start)
6842 != PAGE_SIZE) ||
6843 entry->needs_copy ||
6844 entry->is_sub_map ||
6845 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6846 rc = KERN_INVALID_ARGUMENT;
6847 goto done;
6848 }
6849
6850 object = VME_OBJECT(entry);
6851 offset = VME_OFFSET(entry);
6852 /* need exclusive lock to update m->dirty */
6853 if (entry->protection & VM_PROT_WRITE) {
6854 vm_object_lock(object);
6855 } else {
6856 vm_object_lock_shared(object);
6857 }
6858 m = vm_page_lookup(object, offset);
6859 assert(m != VM_PAGE_NULL);
6860 assert(VM_PAGE_WIRED(m));
6861 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6862 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6863 if (entry->protection & VM_PROT_WRITE) {
6864 vm_object_lock_assert_exclusive(
6865 object);
6866 m->vmp_dirty = TRUE;
6867 }
6868 } else {
6869 /* not already wired !? */
6870 *physpage_p = 0;
6871 }
6872 vm_object_unlock(object);
6873 }
6874
6875 /* map was not unlocked: no need to relookup */
6876 entry = entry->vme_next;
6877 s = entry->vme_start;
6878 continue;
6879 }
6880
6881 /*
6882 * Unwired entry or wire request transmitted via submap
6883 */
6884
6885 /*
6886 * Wiring would copy the pages to the shadow object.
6887 * The shadow object would not be code-signed so
6888 * attempting to execute code from these copied pages
6889 * would trigger a code-signing violation.
6890 */
6891
6892 if ((entry->protection & VM_PROT_EXECUTE)
6893 #if XNU_TARGET_OS_OSX
6894 &&
6895 map->pmap != kernel_pmap &&
6896 (vm_map_cs_enforcement(map)
6897 #if __arm64__
6898 || !VM_MAP_IS_EXOTIC(map)
6899 #endif /* __arm64__ */
6900 )
6901 #endif /* XNU_TARGET_OS_OSX */
6902 #if CODE_SIGNING_MONITOR
6903 &&
6904 (csm_address_space_exempt(map->pmap) != KERN_SUCCESS)
6905 #endif
6906 ) {
6907 #if MACH_ASSERT
6908 printf("pid %d[%s] wiring executable range from "
6909 "0x%llx to 0x%llx: rejected to preserve "
6910 "code-signing\n",
6911 proc_selfpid(),
6912 (get_bsdtask_info(current_task())
6913 ? proc_name_address(get_bsdtask_info(current_task()))
6914 : "?"),
6915 (uint64_t) entry->vme_start,
6916 (uint64_t) entry->vme_end);
6917 #endif /* MACH_ASSERT */
6918 DTRACE_VM2(cs_executable_wire,
6919 uint64_t, (uint64_t)entry->vme_start,
6920 uint64_t, (uint64_t)entry->vme_end);
6921 cs_executable_wire++;
6922 rc = KERN_PROTECTION_FAILURE;
6923 goto done;
6924 }
6925
6926 /*
6927 * Perform actions of vm_map_lookup that need the write
6928 * lock on the map: create a shadow object for a
6929 * copy-on-write region, or an object for a zero-fill
6930 * region.
6931 */
6932 size = entry->vme_end - entry->vme_start;
6933 /*
6934 * If wiring a copy-on-write page, we need to copy it now
6935 * even if we're only (currently) requesting read access.
6936 * This is aggressive, but once it's wired we can't move it.
6937 */
6938 if (entry->needs_copy) {
6939 if (wire_and_extract) {
6940 /*
6941 * We're supposed to share with the original
6942 * provider so should not be "needs_copy"
6943 */
6944 rc = KERN_INVALID_ARGUMENT;
6945 goto done;
6946 }
6947
6948 VME_OBJECT_SHADOW(entry, size,
6949 vm_map_always_shadow(map));
6950 entry->needs_copy = FALSE;
6951 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6952 if (wire_and_extract) {
6953 /*
6954 * We're supposed to share with the original
6955 * provider so should already have an object.
6956 */
6957 rc = KERN_INVALID_ARGUMENT;
6958 goto done;
6959 }
6960 VME_OBJECT_SET(entry, vm_object_allocate(size), false, 0);
6961 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6962 assert(entry->use_pmap);
6963 } else if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6964 if (wire_and_extract) {
6965 /*
6966 * We're supposed to share with the original
6967 * provider so should not be COPY_SYMMETRIC.
6968 */
6969 rc = KERN_INVALID_ARGUMENT;
6970 goto done;
6971 }
6972 /*
6973 * Force an unrequested "copy-on-write" but only for
6974 * the range we're wiring.
6975 */
6976 // printf("FBDP %s:%d map %p entry %p [ 0x%llx 0x%llx ] s 0x%llx end 0x%llx wire&extract=%d\n", __FUNCTION__, __LINE__, map, entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, (uint64_t)s, (uint64_t)end, wire_and_extract);
6977 vm_map_clip_start(map, entry, s);
6978 vm_map_clip_end(map, entry, end);
6979 /* recompute "size" */
6980 size = entry->vme_end - entry->vme_start;
6981 /* make a shadow object */
6982 vm_object_t orig_object;
6983 vm_object_offset_t orig_offset;
6984 orig_object = VME_OBJECT(entry);
6985 orig_offset = VME_OFFSET(entry);
6986 VME_OBJECT_SHADOW(entry, size, vm_map_always_shadow(map));
6987 if (VME_OBJECT(entry) != orig_object) {
6988 /*
6989 * This mapping has not been shared (or it would be
6990 * COPY_DELAY instead of COPY_SYMMETRIC) and it has
6991 * not been copied-on-write (or it would be marked
6992 * as "needs_copy" and would have been handled above
6993 * and also already write-protected).
6994 * We still need to write-protect here to prevent
6995 * other threads from modifying these pages while
6996 * we're in the process of copying and wiring
6997 * the copied pages.
6998 * Since the mapping is neither shared nor COWed,
6999 * we only need to write-protect the PTEs for this
7000 * mapping.
7001 */
7002 vm_object_pmap_protect(orig_object,
7003 orig_offset,
7004 size,
7005 map->pmap,
7006 VM_MAP_PAGE_SIZE(map),
7007 entry->vme_start,
7008 entry->protection & ~VM_PROT_WRITE);
7009 }
7010 }
7011 if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7012 /*
7013 * Make the object COPY_DELAY to get a stable object
7014 * to wire.
7015 * That should avoid creating long shadow chains while
7016 * wiring/unwiring the same range repeatedly.
7017 * That also prevents part of the object from being
7018 * wired while another part is "needs_copy", which
7019 * could result in conflicting rules wrt copy-on-write.
7020 */
7021 vm_object_t object;
7022
7023 object = VME_OBJECT(entry);
7024 vm_object_lock(object);
7025 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7026 assertf(vm_object_round_page(VME_OFFSET(entry) + size) - vm_object_trunc_page(VME_OFFSET(entry)) == object->vo_size,
7027 "object %p size 0x%llx entry %p [0x%llx:0x%llx:0x%llx] size 0x%llx\n",
7028 object, (uint64_t)object->vo_size,
7029 entry,
7030 (uint64_t)entry->vme_start,
7031 (uint64_t)entry->vme_end,
7032 (uint64_t)VME_OFFSET(entry),
7033 (uint64_t)size);
7034 assertf(object->ref_count == 1,
7035 "object %p ref_count %d\n",
7036 object, object->ref_count);
7037 assertf(!entry->needs_copy,
7038 "entry %p\n", entry);
7039 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7040 object->true_share = TRUE;
7041 }
7042 vm_object_unlock(object);
7043 }
7044
7045 vm_map_clip_start(map, entry, s);
7046 vm_map_clip_end(map, entry, end);
7047
7048 /* re-compute "e" */
7049 e = entry->vme_end;
7050 if (e > end) {
7051 e = end;
7052 }
7053
7054 /*
7055 * Check for holes and protection mismatch.
7056 * Holes: Next entry should be contiguous unless this
7057 * is the end of the region.
7058 * Protection: Access requested must be allowed, unless
7059 * wiring is by protection class
7060 */
7061 if ((entry->vme_end < end) &&
7062 ((entry->vme_next == vm_map_to_entry(map)) ||
7063 (entry->vme_next->vme_start > entry->vme_end))) {
7064 /* found a hole */
7065 rc = KERN_INVALID_ADDRESS;
7066 goto done;
7067 }
7068 if ((entry->protection & access_type) != access_type) {
7069 /* found a protection problem */
7070 rc = KERN_PROTECTION_FAILURE;
7071 goto done;
7072 }
7073
7074 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
7075
7076 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
7077 goto done;
7078 }
7079
7080 entry->in_transition = TRUE;
7081
7082 /*
7083 * This entry might get split once we unlock the map.
7084 * In vm_fault_wire(), we need the current range as
7085 * defined by this entry. In order for this to work
7086 * along with a simultaneous clip operation, we make a
7087 * temporary copy of this entry and use that for the
7088 * wiring. Note that the underlying objects do not
7089 * change during a clip.
7090 */
7091 tmp_entry = *entry;
7092
7093 /*
7094 * The in_transition state guarentees that the entry
7095 * (or entries for this range, if split occured) will be
7096 * there when the map lock is acquired for the second time.
7097 */
7098 vm_map_unlock(map);
7099
7100 if (!user_wire && cur_thread != THREAD_NULL) {
7101 interruptible_state = thread_interrupt_level(THREAD_UNINT);
7102 } else {
7103 interruptible_state = THREAD_UNINT;
7104 }
7105
7106 if (map_pmap) {
7107 rc = vm_fault_wire(map,
7108 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7109 physpage_p);
7110 } else {
7111 rc = vm_fault_wire(map,
7112 &tmp_entry, caller_prot, tag, map->pmap,
7113 tmp_entry.vme_start,
7114 physpage_p);
7115 }
7116
7117 if (!user_wire && cur_thread != THREAD_NULL) {
7118 thread_interrupt_level(interruptible_state);
7119 }
7120
7121 vm_map_lock(map);
7122
7123 if (last_timestamp + 1 != map->timestamp) {
7124 /*
7125 * Find the entry again. It could have been clipped
7126 * after we unlocked the map.
7127 */
7128 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7129 &first_entry)) {
7130 panic("vm_map_wire: re-lookup failed");
7131 }
7132
7133 entry = first_entry;
7134 }
7135
7136 last_timestamp = map->timestamp;
7137
7138 while ((entry != vm_map_to_entry(map)) &&
7139 (entry->vme_start < tmp_entry.vme_end)) {
7140 assert(entry->in_transition);
7141 entry->in_transition = FALSE;
7142 if (entry->needs_wakeup) {
7143 entry->needs_wakeup = FALSE;
7144 need_wakeup = TRUE;
7145 }
7146 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7147 subtract_wire_counts(map, entry, user_wire);
7148 }
7149 entry = entry->vme_next;
7150 }
7151
7152 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7153 goto done;
7154 }
7155
7156 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7157 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7158 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7159 /* found a "new" hole */
7160 s = tmp_entry.vme_end;
7161 rc = KERN_INVALID_ADDRESS;
7162 goto done;
7163 }
7164
7165 s = entry->vme_start;
7166 } /* end while loop through map entries */
7167
7168 done:
7169 if (rc == KERN_SUCCESS) {
7170 /* repair any damage we may have made to the VM map */
7171 vm_map_simplify_range(map, start, end);
7172 }
7173
7174 vm_map_unlock(map);
7175
7176 /*
7177 * wake up anybody waiting on entries we wired.
7178 */
7179 if (need_wakeup) {
7180 vm_map_entry_wakeup(map);
7181 }
7182
7183 if (rc != KERN_SUCCESS) {
7184 /* undo what has been wired so far */
7185 vm_map_unwire_nested(map, start, s, user_wire,
7186 map_pmap, pmap_addr);
7187 if (physpage_p) {
7188 *physpage_p = 0;
7189 }
7190 }
7191
7192 return rc;
7193 }
7194
7195 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7196 vm_map_wire_external(
7197 vm_map_t map,
7198 vm_map_offset_t start,
7199 vm_map_offset_t end,
7200 vm_prot_t caller_prot,
7201 boolean_t user_wire)
7202 {
7203 kern_return_t kret;
7204
7205 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7206 user_wire, (pmap_t)NULL, 0, NULL);
7207 return kret;
7208 }
7209
7210 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7211 vm_map_wire_kernel(
7212 vm_map_t map,
7213 vm_map_offset_t start,
7214 vm_map_offset_t end,
7215 vm_prot_t caller_prot,
7216 vm_tag_t tag,
7217 boolean_t user_wire)
7218 {
7219 kern_return_t kret;
7220
7221 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7222 user_wire, (pmap_t)NULL, 0, NULL);
7223 return kret;
7224 }
7225
7226 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7227 vm_map_wire_and_extract_external(
7228 vm_map_t map,
7229 vm_map_offset_t start,
7230 vm_prot_t caller_prot,
7231 boolean_t user_wire,
7232 ppnum_t *physpage_p)
7233 {
7234 kern_return_t kret;
7235
7236 kret = vm_map_wire_nested(map,
7237 start,
7238 start + VM_MAP_PAGE_SIZE(map),
7239 caller_prot,
7240 vm_tag_bt(),
7241 user_wire,
7242 (pmap_t)NULL,
7243 0,
7244 physpage_p);
7245 if (kret != KERN_SUCCESS &&
7246 physpage_p != NULL) {
7247 *physpage_p = 0;
7248 }
7249 return kret;
7250 }
7251
7252 /*
7253 * vm_map_unwire:
7254 *
7255 * Sets the pageability of the specified address range in the target
7256 * as pageable. Regions specified must have been wired previously.
7257 *
7258 * The map must not be locked, but a reference must remain to the map
7259 * throughout the call.
7260 *
7261 * Kernel will panic on failures. User unwire ignores holes and
7262 * unwired and intransition entries to avoid losing memory by leaving
7263 * it unwired.
7264 */
7265 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7266 vm_map_unwire_nested(
7267 vm_map_t map,
7268 vm_map_offset_t start,
7269 vm_map_offset_t end,
7270 boolean_t user_wire,
7271 pmap_t map_pmap,
7272 vm_map_offset_t pmap_addr)
7273 {
7274 vm_map_entry_t entry;
7275 struct vm_map_entry *first_entry, tmp_entry;
7276 boolean_t need_wakeup;
7277 boolean_t main_map = FALSE;
7278 unsigned int last_timestamp;
7279
7280 vm_map_lock(map);
7281 if (map_pmap == NULL) {
7282 main_map = TRUE;
7283 }
7284 last_timestamp = map->timestamp;
7285
7286 VM_MAP_RANGE_CHECK(map, start, end);
7287 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7288 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7289
7290 if (start == end) {
7291 /* We unwired what the caller asked for: zero pages */
7292 vm_map_unlock(map);
7293 return KERN_SUCCESS;
7294 }
7295
7296 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
7297 return KERN_INVALID_ADDRESS;
7298 }
7299
7300 if (vm_map_lookup_entry(map, start, &first_entry)) {
7301 entry = first_entry;
7302 /*
7303 * vm_map_clip_start will be done later.
7304 * We don't want to unnest any nested sub maps here !
7305 */
7306 } else {
7307 if (!user_wire) {
7308 panic("vm_map_unwire: start not found");
7309 }
7310 /* Start address is not in map. */
7311 vm_map_unlock(map);
7312 return KERN_INVALID_ADDRESS;
7313 }
7314
7315 if (entry->superpage_size) {
7316 /* superpages are always wired */
7317 vm_map_unlock(map);
7318 return KERN_INVALID_ADDRESS;
7319 }
7320
7321 need_wakeup = FALSE;
7322 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7323 if (entry->in_transition) {
7324 /*
7325 * 1)
7326 * Another thread is wiring down this entry. Note
7327 * that if it is not for the other thread we would
7328 * be unwiring an unwired entry. This is not
7329 * permitted. If we wait, we will be unwiring memory
7330 * we did not wire.
7331 *
7332 * 2)
7333 * Another thread is unwiring this entry. We did not
7334 * have a reference to it, because if we did, this
7335 * entry will not be getting unwired now.
7336 */
7337 if (!user_wire) {
7338 /*
7339 * XXX FBDP
7340 * This could happen: there could be some
7341 * overlapping vslock/vsunlock operations
7342 * going on.
7343 * We should probably just wait and retry,
7344 * but then we have to be careful that this
7345 * entry could get "simplified" after
7346 * "in_transition" gets unset and before
7347 * we re-lookup the entry, so we would
7348 * have to re-clip the entry to avoid
7349 * re-unwiring what we have already unwired...
7350 * See vm_map_wire_nested().
7351 *
7352 * Or we could just ignore "in_transition"
7353 * here and proceed to decement the wired
7354 * count(s) on this entry. That should be fine
7355 * as long as "wired_count" doesn't drop all
7356 * the way to 0 (and we should panic if THAT
7357 * happens).
7358 */
7359 panic("vm_map_unwire: in_transition entry");
7360 }
7361
7362 entry = entry->vme_next;
7363 continue;
7364 }
7365
7366 if (entry->is_sub_map) {
7367 vm_map_offset_t sub_start;
7368 vm_map_offset_t sub_end;
7369 vm_map_offset_t local_end;
7370 pmap_t pmap;
7371
7372 vm_map_clip_start(map, entry, start);
7373 vm_map_clip_end(map, entry, end);
7374
7375 sub_start = VME_OFFSET(entry);
7376 sub_end = entry->vme_end - entry->vme_start;
7377 sub_end += VME_OFFSET(entry);
7378 local_end = entry->vme_end;
7379 if (map_pmap == NULL) {
7380 if (entry->use_pmap) {
7381 pmap = VME_SUBMAP(entry)->pmap;
7382 pmap_addr = sub_start;
7383 } else {
7384 pmap = map->pmap;
7385 pmap_addr = start;
7386 }
7387 if (entry->wired_count == 0 ||
7388 (user_wire && entry->user_wired_count == 0)) {
7389 if (!user_wire) {
7390 panic("vm_map_unwire: entry is unwired");
7391 }
7392 entry = entry->vme_next;
7393 continue;
7394 }
7395
7396 /*
7397 * Check for holes
7398 * Holes: Next entry should be contiguous unless
7399 * this is the end of the region.
7400 */
7401 if (((entry->vme_end < end) &&
7402 ((entry->vme_next == vm_map_to_entry(map)) ||
7403 (entry->vme_next->vme_start
7404 > entry->vme_end)))) {
7405 if (!user_wire) {
7406 panic("vm_map_unwire: non-contiguous region");
7407 }
7408 /*
7409 * entry = entry->vme_next;
7410 * continue;
7411 */
7412 }
7413
7414 subtract_wire_counts(map, entry, user_wire);
7415
7416 if (entry->wired_count != 0) {
7417 entry = entry->vme_next;
7418 continue;
7419 }
7420
7421 entry->in_transition = TRUE;
7422 tmp_entry = *entry;/* see comment in vm_map_wire() */
7423
7424 /*
7425 * We can unlock the map now. The in_transition state
7426 * guarantees existance of the entry.
7427 */
7428 vm_map_unlock(map);
7429 vm_map_unwire_nested(VME_SUBMAP(entry),
7430 sub_start, sub_end, user_wire, pmap, pmap_addr);
7431 vm_map_lock(map);
7432
7433 if (last_timestamp + 1 != map->timestamp) {
7434 /*
7435 * Find the entry again. It could have been
7436 * clipped or deleted after we unlocked the map.
7437 */
7438 if (!vm_map_lookup_entry(map,
7439 tmp_entry.vme_start,
7440 &first_entry)) {
7441 if (!user_wire) {
7442 panic("vm_map_unwire: re-lookup failed");
7443 }
7444 entry = first_entry->vme_next;
7445 } else {
7446 entry = first_entry;
7447 }
7448 }
7449 last_timestamp = map->timestamp;
7450
7451 /*
7452 * clear transition bit for all constituent entries
7453 * that were in the original entry (saved in
7454 * tmp_entry). Also check for waiters.
7455 */
7456 while ((entry != vm_map_to_entry(map)) &&
7457 (entry->vme_start < tmp_entry.vme_end)) {
7458 assert(entry->in_transition);
7459 entry->in_transition = FALSE;
7460 if (entry->needs_wakeup) {
7461 entry->needs_wakeup = FALSE;
7462 need_wakeup = TRUE;
7463 }
7464 entry = entry->vme_next;
7465 }
7466 continue;
7467 } else {
7468 tmp_entry = *entry;
7469 vm_map_unlock(map);
7470 vm_map_unwire_nested(VME_SUBMAP(entry),
7471 sub_start, sub_end, user_wire, map_pmap,
7472 pmap_addr);
7473 vm_map_lock(map);
7474
7475 if (last_timestamp + 1 != map->timestamp) {
7476 /*
7477 * Find the entry again. It could have been
7478 * clipped or deleted after we unlocked the map.
7479 */
7480 if (!vm_map_lookup_entry(map,
7481 tmp_entry.vme_start,
7482 &first_entry)) {
7483 if (!user_wire) {
7484 panic("vm_map_unwire: re-lookup failed");
7485 }
7486 entry = first_entry->vme_next;
7487 } else {
7488 entry = first_entry;
7489 }
7490 }
7491 last_timestamp = map->timestamp;
7492 }
7493 }
7494
7495
7496 if ((entry->wired_count == 0) ||
7497 (user_wire && entry->user_wired_count == 0)) {
7498 if (!user_wire) {
7499 panic("vm_map_unwire: entry is unwired");
7500 }
7501
7502 entry = entry->vme_next;
7503 continue;
7504 }
7505
7506 assert(entry->wired_count > 0 &&
7507 (!user_wire || entry->user_wired_count > 0));
7508
7509 vm_map_clip_start(map, entry, start);
7510 vm_map_clip_end(map, entry, end);
7511
7512 /*
7513 * Check for holes
7514 * Holes: Next entry should be contiguous unless
7515 * this is the end of the region.
7516 */
7517 if (((entry->vme_end < end) &&
7518 ((entry->vme_next == vm_map_to_entry(map)) ||
7519 (entry->vme_next->vme_start > entry->vme_end)))) {
7520 if (!user_wire) {
7521 panic("vm_map_unwire: non-contiguous region");
7522 }
7523 entry = entry->vme_next;
7524 continue;
7525 }
7526
7527 subtract_wire_counts(map, entry, user_wire);
7528
7529 if (entry->wired_count != 0) {
7530 entry = entry->vme_next;
7531 continue;
7532 }
7533
7534 if (entry->zero_wired_pages) {
7535 entry->zero_wired_pages = FALSE;
7536 }
7537
7538 entry->in_transition = TRUE;
7539 tmp_entry = *entry; /* see comment in vm_map_wire() */
7540
7541 /*
7542 * We can unlock the map now. The in_transition state
7543 * guarantees existance of the entry.
7544 */
7545 vm_map_unlock(map);
7546 if (map_pmap) {
7547 vm_fault_unwire(map, &tmp_entry, FALSE, map_pmap,
7548 pmap_addr, tmp_entry.vme_end);
7549 } else {
7550 vm_fault_unwire(map, &tmp_entry, FALSE, map->pmap,
7551 tmp_entry.vme_start, tmp_entry.vme_end);
7552 }
7553 vm_map_lock(map);
7554
7555 if (last_timestamp + 1 != map->timestamp) {
7556 /*
7557 * Find the entry again. It could have been clipped
7558 * or deleted after we unlocked the map.
7559 */
7560 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7561 &first_entry)) {
7562 if (!user_wire) {
7563 panic("vm_map_unwire: re-lookup failed");
7564 }
7565 entry = first_entry->vme_next;
7566 } else {
7567 entry = first_entry;
7568 }
7569 }
7570 last_timestamp = map->timestamp;
7571
7572 /*
7573 * clear transition bit for all constituent entries that
7574 * were in the original entry (saved in tmp_entry). Also
7575 * check for waiters.
7576 */
7577 while ((entry != vm_map_to_entry(map)) &&
7578 (entry->vme_start < tmp_entry.vme_end)) {
7579 assert(entry->in_transition);
7580 entry->in_transition = FALSE;
7581 if (entry->needs_wakeup) {
7582 entry->needs_wakeup = FALSE;
7583 need_wakeup = TRUE;
7584 }
7585 entry = entry->vme_next;
7586 }
7587 }
7588
7589 /*
7590 * We might have fragmented the address space when we wired this
7591 * range of addresses. Attempt to re-coalesce these VM map entries
7592 * with their neighbors now that they're no longer wired.
7593 * Under some circumstances, address space fragmentation can
7594 * prevent VM object shadow chain collapsing, which can cause
7595 * swap space leaks.
7596 */
7597 vm_map_simplify_range(map, start, end);
7598
7599 vm_map_unlock(map);
7600 /*
7601 * wake up anybody waiting on entries that we have unwired.
7602 */
7603 if (need_wakeup) {
7604 vm_map_entry_wakeup(map);
7605 }
7606 return KERN_SUCCESS;
7607 }
7608
7609 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7610 vm_map_unwire(
7611 vm_map_t map,
7612 vm_map_offset_t start,
7613 vm_map_offset_t end,
7614 boolean_t user_wire)
7615 {
7616 return vm_map_unwire_nested(map, start, end,
7617 user_wire, (pmap_t)NULL, 0);
7618 }
7619
7620
7621 /*
7622 * vm_map_entry_zap: [ internal use only ]
7623 *
7624 * Remove the entry from the target map
7625 * and put it on a zap list.
7626 */
7627 static void
vm_map_entry_zap(vm_map_t map,vm_map_entry_t entry,vm_map_zap_t zap)7628 vm_map_entry_zap(
7629 vm_map_t map,
7630 vm_map_entry_t entry,
7631 vm_map_zap_t zap)
7632 {
7633 vm_map_offset_t s, e;
7634
7635 s = entry->vme_start;
7636 e = entry->vme_end;
7637 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7638 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7639 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7640 assert(page_aligned(s));
7641 assert(page_aligned(e));
7642 }
7643 if (entry->map_aligned == TRUE) {
7644 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7645 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7646 }
7647 assert(entry->wired_count == 0);
7648 assert(entry->user_wired_count == 0);
7649 assert(!entry->vme_permanent);
7650
7651 vm_map_store_entry_unlink(map, entry, false);
7652 map->size -= e - s;
7653
7654 vm_map_zap_append(zap, entry);
7655 }
7656
7657 static void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7658 vm_map_submap_pmap_clean(
7659 vm_map_t map,
7660 vm_map_offset_t start,
7661 vm_map_offset_t end,
7662 vm_map_t sub_map,
7663 vm_map_offset_t offset)
7664 {
7665 vm_map_offset_t submap_start;
7666 vm_map_offset_t submap_end;
7667 vm_map_size_t remove_size;
7668 vm_map_entry_t entry;
7669
7670 submap_end = offset + (end - start);
7671 submap_start = offset;
7672
7673 vm_map_lock_read(sub_map);
7674 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7675 remove_size = (entry->vme_end - entry->vme_start);
7676 if (offset > entry->vme_start) {
7677 remove_size -= offset - entry->vme_start;
7678 }
7679
7680
7681 if (submap_end < entry->vme_end) {
7682 remove_size -=
7683 entry->vme_end - submap_end;
7684 }
7685 if (entry->is_sub_map) {
7686 vm_map_submap_pmap_clean(
7687 sub_map,
7688 start,
7689 start + remove_size,
7690 VME_SUBMAP(entry),
7691 VME_OFFSET(entry));
7692 } else {
7693 if (map->mapped_in_other_pmaps &&
7694 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7695 VME_OBJECT(entry) != NULL) {
7696 vm_object_pmap_protect_options(
7697 VME_OBJECT(entry),
7698 (VME_OFFSET(entry) +
7699 offset -
7700 entry->vme_start),
7701 remove_size,
7702 PMAP_NULL,
7703 PAGE_SIZE,
7704 entry->vme_start,
7705 VM_PROT_NONE,
7706 PMAP_OPTIONS_REMOVE);
7707 } else {
7708 pmap_remove(map->pmap,
7709 (addr64_t)start,
7710 (addr64_t)(start + remove_size));
7711 }
7712 }
7713 }
7714
7715 entry = entry->vme_next;
7716
7717 while ((entry != vm_map_to_entry(sub_map))
7718 && (entry->vme_start < submap_end)) {
7719 remove_size = (entry->vme_end - entry->vme_start);
7720 if (submap_end < entry->vme_end) {
7721 remove_size -= entry->vme_end - submap_end;
7722 }
7723 if (entry->is_sub_map) {
7724 vm_map_submap_pmap_clean(
7725 sub_map,
7726 (start + entry->vme_start) - offset,
7727 ((start + entry->vme_start) - offset) + remove_size,
7728 VME_SUBMAP(entry),
7729 VME_OFFSET(entry));
7730 } else {
7731 if (map->mapped_in_other_pmaps &&
7732 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7733 VME_OBJECT(entry) != NULL) {
7734 vm_object_pmap_protect_options(
7735 VME_OBJECT(entry),
7736 VME_OFFSET(entry),
7737 remove_size,
7738 PMAP_NULL,
7739 PAGE_SIZE,
7740 entry->vme_start,
7741 VM_PROT_NONE,
7742 PMAP_OPTIONS_REMOVE);
7743 } else {
7744 pmap_remove(map->pmap,
7745 (addr64_t)((start + entry->vme_start)
7746 - offset),
7747 (addr64_t)(((start + entry->vme_start)
7748 - offset) + remove_size));
7749 }
7750 }
7751 entry = entry->vme_next;
7752 }
7753 vm_map_unlock_read(sub_map);
7754 return;
7755 }
7756
7757 /*
7758 * virt_memory_guard_ast:
7759 *
7760 * Handle the AST callout for a virtual memory guard.
7761 * raise an EXC_GUARD exception and terminate the task
7762 * if configured to do so.
7763 */
7764 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7765 virt_memory_guard_ast(
7766 thread_t thread,
7767 mach_exception_data_type_t code,
7768 mach_exception_data_type_t subcode)
7769 {
7770 task_t task = get_threadtask(thread);
7771 assert(task != kernel_task);
7772 assert(task == current_task());
7773 kern_return_t sync_exception_result;
7774 uint32_t behavior;
7775
7776 behavior = task->task_exc_guard;
7777
7778 /* Is delivery enabled */
7779 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7780 return;
7781 }
7782
7783 /* If only once, make sure we're that once */
7784 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7785 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7786
7787 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7788 break;
7789 }
7790 behavior = task->task_exc_guard;
7791 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7792 return;
7793 }
7794 }
7795
7796 /* Raise exception synchronously and see if handler claimed it */
7797 sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7798
7799 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7800 /*
7801 * If Synchronous EXC_GUARD delivery was successful then
7802 * kill the process and return, else kill the process
7803 * and deliver the exception via EXC_CORPSE_NOTIFY.
7804 */
7805 if (sync_exception_result == KERN_SUCCESS) {
7806 task_bsdtask_kill(current_task());
7807 } else {
7808 exit_with_guard_exception(current_proc(), code, subcode);
7809 }
7810 } else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7811 /*
7812 * If the synchronous EXC_GUARD delivery was not successful,
7813 * raise a simulated crash.
7814 */
7815 if (sync_exception_result != KERN_SUCCESS) {
7816 task_violated_guard(code, subcode, NULL, FALSE);
7817 }
7818 }
7819 }
7820
7821 /*
7822 * vm_map_guard_exception:
7823 *
7824 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7825 *
7826 * Right now, we do this when we find nothing mapped, or a
7827 * gap in the mapping when a user address space deallocate
7828 * was requested. We report the address of the first gap found.
7829 */
7830 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7831 vm_map_guard_exception(
7832 vm_map_offset_t gap_start,
7833 unsigned reason)
7834 {
7835 mach_exception_code_t code = 0;
7836 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7837 unsigned int target = 0; /* should we pass in pid associated with map? */
7838 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7839 boolean_t fatal = FALSE;
7840
7841 task_t task = current_task_early();
7842
7843 /* Can't deliver exceptions to a NULL task (early boot) or kernel task */
7844 if (task == NULL || task == kernel_task) {
7845 return;
7846 }
7847
7848 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7849 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7850 EXC_GUARD_ENCODE_TARGET(code, target);
7851
7852 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7853 fatal = TRUE;
7854 }
7855 thread_guard_violation(current_thread(), code, subcode, fatal);
7856 }
7857
7858 static kern_return_t
vm_map_delete_submap_recurse(vm_map_t submap,vm_map_offset_t submap_start,vm_map_offset_t submap_end)7859 vm_map_delete_submap_recurse(
7860 vm_map_t submap,
7861 vm_map_offset_t submap_start,
7862 vm_map_offset_t submap_end)
7863 {
7864 vm_map_entry_t submap_entry;
7865
7866 /*
7867 * Verify that the submap does not contain any "permanent" entries
7868 * within the specified range.
7869 * We do not care about gaps.
7870 */
7871
7872 vm_map_lock(submap);
7873
7874 if (!vm_map_lookup_entry(submap, submap_start, &submap_entry)) {
7875 submap_entry = submap_entry->vme_next;
7876 }
7877
7878 for (;
7879 submap_entry != vm_map_to_entry(submap) &&
7880 submap_entry->vme_start < submap_end;
7881 submap_entry = submap_entry->vme_next) {
7882 if (submap_entry->vme_permanent) {
7883 /* "permanent" entry -> fail */
7884 vm_map_unlock(submap);
7885 return KERN_PROTECTION_FAILURE;
7886 }
7887 }
7888 /* no "permanent" entries in the range -> success */
7889 vm_map_unlock(submap);
7890 return KERN_SUCCESS;
7891 }
7892
7893 __abortlike
7894 static void
__vm_map_delete_misaligned_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)7895 __vm_map_delete_misaligned_panic(
7896 vm_map_t map,
7897 vm_map_offset_t start,
7898 vm_map_offset_t end)
7899 {
7900 panic("vm_map_delete(%p,0x%llx,0x%llx): start is not aligned to 0x%x",
7901 map, (uint64_t)start, (uint64_t)end, VM_MAP_PAGE_SIZE(map));
7902 }
7903
7904 __abortlike
7905 static void
__vm_map_delete_failed_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,kern_return_t kr)7906 __vm_map_delete_failed_panic(
7907 vm_map_t map,
7908 vm_map_offset_t start,
7909 vm_map_offset_t end,
7910 kern_return_t kr)
7911 {
7912 panic("vm_map_delete(%p,0x%llx,0x%llx): failed unexpected with %d",
7913 map, (uint64_t)start, (uint64_t)end, kr);
7914 }
7915
7916 __abortlike
7917 static void
__vm_map_delete_gap_panic(vm_map_t map,vm_map_offset_t where,vm_map_offset_t start,vm_map_offset_t end)7918 __vm_map_delete_gap_panic(
7919 vm_map_t map,
7920 vm_map_offset_t where,
7921 vm_map_offset_t start,
7922 vm_map_offset_t end)
7923 {
7924 panic("vm_map_delete(%p,0x%llx,0x%llx): no map entry at 0x%llx",
7925 map, (uint64_t)start, (uint64_t)end, (uint64_t)where);
7926 }
7927
7928 __abortlike
7929 static void
__vm_map_delete_permanent_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t entry)7930 __vm_map_delete_permanent_panic(
7931 vm_map_t map,
7932 vm_map_offset_t start,
7933 vm_map_offset_t end,
7934 vm_map_entry_t entry)
7935 {
7936 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7937 "Attempting to remove permanent VM map entry %p [0x%llx:0x%llx]",
7938 map, (uint64_t)start, (uint64_t)end, entry,
7939 (uint64_t)entry->vme_start,
7940 (uint64_t)entry->vme_end);
7941 }
7942
7943 __options_decl(vm_map_delete_state_t, uint32_t, {
7944 VMDS_NONE = 0x0000,
7945
7946 VMDS_FOUND_GAP = 0x0001,
7947 VMDS_GAPS_OK = 0x0002,
7948
7949 VMDS_KERNEL_PMAP = 0x0004,
7950 VMDS_NEEDS_LOOKUP = 0x0008,
7951 VMDS_NEEDS_WAKEUP = 0x0010,
7952 VMDS_KERNEL_KMEMPTR = 0x0020
7953 });
7954
7955 /*
7956 * vm_map_delete: [ internal use only ]
7957 *
7958 * Deallocates the given address range from the target map.
7959 * Removes all user wirings. Unwires one kernel wiring if
7960 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7961 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7962 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7963 *
7964 *
7965 * When the map is a kernel map, then any error in removing mappings
7966 * will lead to a panic so that clients do not have to repeat the panic
7967 * code at each call site. If VM_MAP_REMOVE_INTERRUPTIBLE
7968 * is also passed, then KERN_ABORTED will not lead to a panic.
7969 *
7970 * This routine is called with map locked and leaves map locked.
7971 */
7972 static kmem_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard,vm_map_zap_t zap_list)7973 vm_map_delete(
7974 vm_map_t map,
7975 vm_map_offset_t start,
7976 vm_map_offset_t end,
7977 vmr_flags_t flags,
7978 kmem_guard_t guard,
7979 vm_map_zap_t zap_list)
7980 {
7981 vm_map_entry_t entry, next;
7982 int interruptible;
7983 vm_map_offset_t gap_start = 0;
7984 vm_map_offset_t clear_in_transition_end = 0;
7985 __unused vm_map_offset_t save_start = start;
7986 __unused vm_map_offset_t save_end = end;
7987 vm_map_delete_state_t state = VMDS_NONE;
7988 kmem_return_t ret = { };
7989 vm_map_range_id_t range_id = 0;
7990 struct kmem_page_meta *meta = NULL;
7991 uint32_t size_idx, slot_idx;
7992 struct mach_vm_range slot;
7993
7994 if (vm_map_pmap(map) == kernel_pmap) {
7995 state |= VMDS_KERNEL_PMAP;
7996 range_id = kmem_addr_get_range(start, end - start);
7997 if (kmem_is_ptr_range(range_id)) {
7998 state |= VMDS_KERNEL_KMEMPTR;
7999 slot_idx = kmem_addr_get_slot_idx(start, end, range_id, &meta,
8000 &size_idx, &slot);
8001 }
8002 }
8003
8004 if (map->terminated || os_ref_get_count_raw(&map->map_refcnt) == 0) {
8005 state |= VMDS_GAPS_OK;
8006 }
8007
8008 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
8009 THREAD_ABORTSAFE : THREAD_UNINT;
8010
8011 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) == 0 &&
8012 (start & VM_MAP_PAGE_MASK(map))) {
8013 __vm_map_delete_misaligned_panic(map, start, end);
8014 }
8015
8016 if ((state & VMDS_GAPS_OK) == 0) {
8017 /*
8018 * If the map isn't terminated then all deletions must have
8019 * no gaps, and be within the [min, max) of the map.
8020 *
8021 * We got here without VM_MAP_RANGE_CHECK() being called,
8022 * and hence must validate bounds manually.
8023 *
8024 * It is worth noting that because vm_deallocate() will
8025 * round_page() the deallocation size, it's possible for "end"
8026 * to be 0 here due to overflow. We hence must treat it as being
8027 * beyond vm_map_max(map).
8028 *
8029 * Similarly, end < start means some wrap around happend,
8030 * which should cause an error or panic.
8031 */
8032 if (end == 0 || end > vm_map_max(map)) {
8033 state |= VMDS_FOUND_GAP;
8034 gap_start = vm_map_max(map);
8035 if (state & VMDS_KERNEL_PMAP) {
8036 __vm_map_delete_gap_panic(map,
8037 gap_start, start, end);
8038 }
8039 goto out;
8040 }
8041
8042 if (end < start) {
8043 if (state & VMDS_KERNEL_PMAP) {
8044 __vm_map_delete_gap_panic(map,
8045 vm_map_max(map), start, end);
8046 }
8047 ret.kmr_return = KERN_INVALID_ARGUMENT;
8048 goto out;
8049 }
8050
8051 if (start < vm_map_min(map)) {
8052 state |= VMDS_FOUND_GAP;
8053 gap_start = start;
8054 if (state & VMDS_KERNEL_PMAP) {
8055 __vm_map_delete_gap_panic(map,
8056 gap_start, start, end);
8057 }
8058 goto out;
8059 }
8060 } else {
8061 /*
8062 * If the map is terminated, we must accept start/end
8063 * being beyond the boundaries of the map as this is
8064 * how some of the mappings like commpage mappings
8065 * can be destroyed (they're outside of those bounds).
8066 *
8067 * end < start is still something we can't cope with,
8068 * so just bail.
8069 */
8070 if (end < start) {
8071 goto out;
8072 }
8073 }
8074
8075
8076 /*
8077 * Find the start of the region.
8078 *
8079 * If in a superpage, extend the range
8080 * to include the start of the mapping.
8081 */
8082 while (vm_map_lookup_entry_or_next(map, start, &entry)) {
8083 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
8084 start = SUPERPAGE_ROUND_DOWN(start);
8085 } else {
8086 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8087 break;
8088 }
8089 }
8090
8091 if (entry->superpage_size) {
8092 end = SUPERPAGE_ROUND_UP(end);
8093 }
8094
8095 /*
8096 * Step through all entries in this region
8097 */
8098 for (vm_map_offset_t s = start; s < end;) {
8099 /*
8100 * At this point, we have deleted all the memory entries
8101 * in [start, s) and are proceeding with the [s, end) range.
8102 *
8103 * This loop might drop the map lock, and it is possible that
8104 * some memory was already reallocated within [start, s)
8105 * and we don't want to mess with those entries.
8106 *
8107 * Some of those entries could even have been re-assembled
8108 * with an entry after "s" (in vm_map_simplify_entry()), so
8109 * we may have to vm_map_clip_start() again.
8110 *
8111 * When clear_in_transition_end is set, the we had marked
8112 * [start, clear_in_transition_end) as "in_transition"
8113 * during a previous iteration and we need to clear it.
8114 */
8115
8116 /*
8117 * Step 1: If needed (because we dropped locks),
8118 * lookup the entry again.
8119 *
8120 * If we're coming back from unwiring (Step 5),
8121 * we also need to mark the entries as no longer
8122 * in transition after that.
8123 */
8124
8125 if (state & VMDS_NEEDS_LOOKUP) {
8126 state &= ~VMDS_NEEDS_LOOKUP;
8127
8128 if (vm_map_lookup_entry_or_next(map, s, &entry)) {
8129 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8130 }
8131
8132 if (state & VMDS_KERNEL_KMEMPTR) {
8133 kmem_validate_slot(s, meta, size_idx, slot_idx);
8134 }
8135 }
8136
8137 if (clear_in_transition_end) {
8138 for (vm_map_entry_t it = entry;
8139 it != vm_map_to_entry(map) &&
8140 it->vme_start < clear_in_transition_end;
8141 it = it->vme_next) {
8142 assert(it->in_transition);
8143 it->in_transition = FALSE;
8144 if (it->needs_wakeup) {
8145 it->needs_wakeup = FALSE;
8146 state |= VMDS_NEEDS_WAKEUP;
8147 }
8148 }
8149
8150 clear_in_transition_end = 0;
8151 }
8152
8153
8154 /*
8155 * Step 2: Perform various policy checks
8156 * before we do _anything_ to this entry.
8157 */
8158
8159 if (entry == vm_map_to_entry(map) || s < entry->vme_start) {
8160 if (state & (VMDS_GAPS_OK | VMDS_FOUND_GAP)) {
8161 /*
8162 * Either we found a gap already,
8163 * or we are tearing down a map,
8164 * keep going.
8165 */
8166 } else if (state & VMDS_KERNEL_PMAP) {
8167 __vm_map_delete_gap_panic(map, s, start, end);
8168 } else if (s < end) {
8169 state |= VMDS_FOUND_GAP;
8170 gap_start = s;
8171 }
8172
8173 if (entry == vm_map_to_entry(map) ||
8174 end <= entry->vme_start) {
8175 break;
8176 }
8177
8178 s = entry->vme_start;
8179 }
8180
8181 if (state & VMDS_KERNEL_PMAP) {
8182 /*
8183 * In the kernel map and its submaps,
8184 * permanent entries never die, even
8185 * if VM_MAP_REMOVE_IMMUTABLE is passed.
8186 */
8187 if (entry->vme_permanent) {
8188 __vm_map_delete_permanent_panic(map, start, end, entry);
8189 }
8190
8191 if (flags & VM_MAP_REMOVE_GUESS_SIZE) {
8192 end = entry->vme_end;
8193 flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
8194 }
8195
8196 /*
8197 * In the kernel map and its submaps,
8198 * the removal of an atomic/guarded entry is strict.
8199 *
8200 * An atomic entry is processed only if it was
8201 * specifically targeted.
8202 *
8203 * We might have deleted non-atomic entries before
8204 * we reach this this point however...
8205 */
8206 kmem_entry_validate_guard(map, entry,
8207 start, end - start, guard);
8208 }
8209
8210 /*
8211 * Step 2.1: handle "permanent" and "submap" entries
8212 * *before* clipping to avoid triggering some unnecessary
8213 * un-nesting of the shared region.
8214 */
8215 if (entry->vme_permanent && entry->is_sub_map) {
8216 // printf("FBDP %s:%d permanent submap...\n", __FUNCTION__, __LINE__);
8217 /*
8218 * Un-mapping a "permanent" mapping of a user-space
8219 * submap is not allowed unless...
8220 */
8221 if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8222 /*
8223 * a. explicitly requested by the kernel caller.
8224 */
8225 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE\n", __FUNCTION__, __LINE__);
8226 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8227 developer_mode_state()) {
8228 /*
8229 * b. we're in "developer" mode (for
8230 * breakpoints, dtrace probes, ...).
8231 */
8232 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE_CODE\n", __FUNCTION__, __LINE__);
8233 } else if (map->terminated) {
8234 /*
8235 * c. this is the final address space cleanup.
8236 */
8237 // printf("FBDP %s:%d map->terminated\n", __FUNCTION__, __LINE__);
8238 } else {
8239 vm_map_offset_t submap_start, submap_end;
8240 kern_return_t submap_kr;
8241
8242 /*
8243 * Check if there are any "permanent" mappings
8244 * in this range in the submap.
8245 */
8246 if (entry->in_transition) {
8247 /* can that even happen ? */
8248 goto in_transition;
8249 }
8250 /* compute the clipped range in the submap */
8251 submap_start = s - entry->vme_start;
8252 submap_start += VME_OFFSET(entry);
8253 submap_end = end - entry->vme_start;
8254 submap_end += VME_OFFSET(entry);
8255 submap_kr = vm_map_delete_submap_recurse(
8256 VME_SUBMAP(entry),
8257 submap_start,
8258 submap_end);
8259 if (submap_kr != KERN_SUCCESS) {
8260 /*
8261 * There are some "permanent" mappings
8262 * in the submap: we are not allowed
8263 * to remove this range.
8264 */
8265 printf("%d[%s] removing permanent submap entry "
8266 "%p [0x%llx:0x%llx] prot 0x%x/0x%x -> KERN_PROT_FAILURE\n",
8267 proc_selfpid(),
8268 (get_bsdtask_info(current_task())
8269 ? proc_name_address(get_bsdtask_info(current_task()))
8270 : "?"), entry,
8271 (uint64_t)entry->vme_start,
8272 (uint64_t)entry->vme_end,
8273 entry->protection,
8274 entry->max_protection);
8275 DTRACE_VM6(vm_map_delete_permanent_deny_submap,
8276 vm_map_entry_t, entry,
8277 vm_map_offset_t, entry->vme_start,
8278 vm_map_offset_t, entry->vme_end,
8279 vm_prot_t, entry->protection,
8280 vm_prot_t, entry->max_protection,
8281 int, VME_ALIAS(entry));
8282 ret.kmr_return = KERN_PROTECTION_FAILURE;
8283 goto out;
8284 }
8285 /* no permanent mappings: proceed */
8286 }
8287 }
8288
8289 /*
8290 * Step 3: Perform any clipping needed.
8291 *
8292 * After this, "entry" starts at "s", ends before "end"
8293 */
8294
8295 if (entry->vme_start < s) {
8296 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8297 entry->map_aligned &&
8298 !VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))) {
8299 /*
8300 * The entry will no longer be map-aligned
8301 * after clipping and the caller said it's OK.
8302 */
8303 entry->map_aligned = FALSE;
8304 }
8305 vm_map_clip_start(map, entry, s);
8306 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8307 }
8308
8309 if (end < entry->vme_end) {
8310 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8311 entry->map_aligned &&
8312 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
8313 /*
8314 * The entry will no longer be map-aligned
8315 * after clipping and the caller said it's OK.
8316 */
8317 entry->map_aligned = FALSE;
8318 }
8319 vm_map_clip_end(map, entry, end);
8320 }
8321
8322 if (entry->vme_permanent && entry->is_sub_map) {
8323 /*
8324 * We already went through step 2.1 which did not deny
8325 * the removal of this "permanent" and "is_sub_map"
8326 * entry.
8327 * Now that we've clipped what we actually want to
8328 * delete, undo the "permanent" part to allow the
8329 * removal to proceed.
8330 */
8331 DTRACE_VM6(vm_map_delete_permanent_allow_submap,
8332 vm_map_entry_t, entry,
8333 vm_map_offset_t, entry->vme_start,
8334 vm_map_offset_t, entry->vme_end,
8335 vm_prot_t, entry->protection,
8336 vm_prot_t, entry->max_protection,
8337 int, VME_ALIAS(entry));
8338 entry->vme_permanent = false;
8339 }
8340
8341 assert(s == entry->vme_start);
8342 assert(entry->vme_end <= end);
8343
8344
8345 /*
8346 * Step 4: If the entry is in flux, wait for this to resolve.
8347 */
8348
8349 if (entry->in_transition) {
8350 wait_result_t wait_result;
8351
8352 in_transition:
8353 /*
8354 * Another thread is wiring/unwiring this entry.
8355 * Let the other thread know we are waiting.
8356 */
8357
8358 entry->needs_wakeup = TRUE;
8359
8360 /*
8361 * wake up anybody waiting on entries that we have
8362 * already unwired/deleted.
8363 */
8364 if (state & VMDS_NEEDS_WAKEUP) {
8365 vm_map_entry_wakeup(map);
8366 state &= ~VMDS_NEEDS_WAKEUP;
8367 }
8368
8369 wait_result = vm_map_entry_wait(map, interruptible);
8370
8371 if (interruptible &&
8372 wait_result == THREAD_INTERRUPTED) {
8373 /*
8374 * We do not clear the needs_wakeup flag,
8375 * since we cannot tell if we were the only one.
8376 */
8377 ret.kmr_return = KERN_ABORTED;
8378 return ret;
8379 }
8380
8381 /*
8382 * The entry could have been clipped or it
8383 * may not exist anymore. Look it up again.
8384 */
8385 state |= VMDS_NEEDS_LOOKUP;
8386 continue;
8387 }
8388
8389
8390 /*
8391 * Step 5: Handle wiring
8392 */
8393
8394 if (entry->wired_count) {
8395 struct vm_map_entry tmp_entry;
8396 boolean_t user_wire;
8397 unsigned int last_timestamp;
8398
8399 user_wire = entry->user_wired_count > 0;
8400
8401 /*
8402 * Remove a kernel wiring if requested
8403 */
8404 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8405 entry->wired_count--;
8406 }
8407
8408 /*
8409 * Remove all user wirings for proper accounting
8410 */
8411 while (entry->user_wired_count) {
8412 subtract_wire_counts(map, entry, user_wire);
8413 }
8414
8415 /*
8416 * All our DMA I/O operations in IOKit are currently
8417 * done by wiring through the map entries of the task
8418 * requesting the I/O.
8419 *
8420 * Because of this, we must always wait for kernel wirings
8421 * to go away on the entries before deleting them.
8422 *
8423 * Any caller who wants to actually remove a kernel wiring
8424 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
8425 * properly remove one wiring instead of blasting through
8426 * them all.
8427 */
8428 if (entry->wired_count != 0) {
8429 assert(map != kernel_map);
8430 /*
8431 * Cannot continue. Typical case is when
8432 * a user thread has physical io pending on
8433 * on this page. Either wait for the
8434 * kernel wiring to go away or return an
8435 * error.
8436 */
8437 wait_result_t wait_result;
8438
8439 entry->needs_wakeup = TRUE;
8440 wait_result = vm_map_entry_wait(map,
8441 interruptible);
8442
8443 if (interruptible &&
8444 wait_result == THREAD_INTERRUPTED) {
8445 /*
8446 * We do not clear the
8447 * needs_wakeup flag, since we
8448 * cannot tell if we were the
8449 * only one.
8450 */
8451 ret.kmr_return = KERN_ABORTED;
8452 return ret;
8453 }
8454
8455
8456 /*
8457 * The entry could have been clipped or
8458 * it may not exist anymore. Look it
8459 * up again.
8460 */
8461 state |= VMDS_NEEDS_LOOKUP;
8462 continue;
8463 }
8464
8465 /*
8466 * We can unlock the map now.
8467 *
8468 * The entry might be split once we unlock the map,
8469 * but we need the range as defined by this entry
8470 * to be stable. So we must make a local copy.
8471 *
8472 * The underlying objects do not change during clips,
8473 * and the in_transition state guarentees existence
8474 * of the entry.
8475 */
8476 last_timestamp = map->timestamp;
8477 entry->in_transition = TRUE;
8478 tmp_entry = *entry;
8479 vm_map_unlock(map);
8480
8481 if (tmp_entry.is_sub_map) {
8482 vm_map_t sub_map;
8483 vm_map_offset_t sub_start, sub_end;
8484 pmap_t pmap;
8485 vm_map_offset_t pmap_addr;
8486
8487
8488 sub_map = VME_SUBMAP(&tmp_entry);
8489 sub_start = VME_OFFSET(&tmp_entry);
8490 sub_end = sub_start + (tmp_entry.vme_end -
8491 tmp_entry.vme_start);
8492 if (tmp_entry.use_pmap) {
8493 pmap = sub_map->pmap;
8494 pmap_addr = tmp_entry.vme_start;
8495 } else {
8496 pmap = map->pmap;
8497 pmap_addr = tmp_entry.vme_start;
8498 }
8499 (void) vm_map_unwire_nested(sub_map,
8500 sub_start, sub_end,
8501 user_wire,
8502 pmap, pmap_addr);
8503 } else {
8504 vm_map_offset_t entry_end = tmp_entry.vme_end;
8505 vm_map_offset_t max_end;
8506
8507 if (flags & VM_MAP_REMOVE_NOKUNWIRE_LAST) {
8508 max_end = end - VM_MAP_PAGE_SIZE(map);
8509 if (entry_end > max_end) {
8510 entry_end = max_end;
8511 }
8512 }
8513
8514 if (tmp_entry.vme_kernel_object) {
8515 pmap_protect_options(
8516 map->pmap,
8517 tmp_entry.vme_start,
8518 entry_end,
8519 VM_PROT_NONE,
8520 PMAP_OPTIONS_REMOVE,
8521 NULL);
8522 }
8523 vm_fault_unwire(map, &tmp_entry,
8524 tmp_entry.vme_kernel_object, map->pmap,
8525 tmp_entry.vme_start, entry_end);
8526 }
8527
8528 vm_map_lock(map);
8529
8530 /*
8531 * Unwiring happened, we can now go back to deleting
8532 * them (after we clear the in_transition bit for the range).
8533 */
8534 if (last_timestamp + 1 != map->timestamp) {
8535 state |= VMDS_NEEDS_LOOKUP;
8536 }
8537 clear_in_transition_end = tmp_entry.vme_end;
8538 continue;
8539 }
8540
8541 assert(entry->wired_count == 0);
8542 assert(entry->user_wired_count == 0);
8543
8544
8545 /*
8546 * Step 6: Entry is unwired and ready for us to delete !
8547 */
8548
8549 if (!entry->vme_permanent) {
8550 /*
8551 * Typical case: the entry really shouldn't be permanent
8552 */
8553 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8554 (entry->protection & VM_PROT_EXECUTE) &&
8555 developer_mode_state()) {
8556 /*
8557 * Allow debuggers to undo executable mappings
8558 * when developer mode is on.
8559 */
8560 #if 0
8561 printf("FBDP %d[%s] removing permanent executable entry "
8562 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8563 proc_selfpid(),
8564 (current_task()->bsd_info
8565 ? proc_name_address(current_task()->bsd_info)
8566 : "?"), entry,
8567 (uint64_t)entry->vme_start,
8568 (uint64_t)entry->vme_end,
8569 entry->protection,
8570 entry->max_protection);
8571 #endif
8572 entry->vme_permanent = FALSE;
8573 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE) || map->terminated) {
8574 #if 0
8575 printf("FBDP %d[%s] removing permanent entry "
8576 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8577 proc_selfpid(),
8578 (current_task()->bsd_info
8579 ? proc_name_address(current_task()->bsd_info)
8580 : "?"), entry,
8581 (uint64_t)entry->vme_start,
8582 (uint64_t)entry->vme_end,
8583 entry->protection,
8584 entry->max_protection);
8585 #endif
8586 entry->vme_permanent = FALSE;
8587 #if CODE_SIGNING_MONITOR
8588 } else if ((entry->protection & VM_PROT_EXECUTE) && !csm_enabled()) {
8589 entry->vme_permanent = FALSE;
8590
8591 printf("%d[%s] %s(0x%llx,0x%llx): "
8592 "code signing monitor disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
8593 "prot 0x%x/0x%x\n",
8594 proc_selfpid(),
8595 (get_bsdtask_info(current_task())
8596 ? proc_name_address(get_bsdtask_info(current_task()))
8597 : "?"),
8598 __FUNCTION__,
8599 (uint64_t)start,
8600 (uint64_t)end,
8601 (uint64_t)entry->vme_start,
8602 (uint64_t)entry->vme_end,
8603 entry->protection,
8604 entry->max_protection);
8605 #endif
8606 } else {
8607 DTRACE_VM6(vm_map_delete_permanent,
8608 vm_map_entry_t, entry,
8609 vm_map_offset_t, entry->vme_start,
8610 vm_map_offset_t, entry->vme_end,
8611 vm_prot_t, entry->protection,
8612 vm_prot_t, entry->max_protection,
8613 int, VME_ALIAS(entry));
8614 }
8615
8616 if (entry->is_sub_map) {
8617 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8618 "map %p (%d) entry %p submap %p (%d)\n",
8619 map, VM_MAP_PAGE_SHIFT(map), entry,
8620 VME_SUBMAP(entry),
8621 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8622 if (entry->use_pmap) {
8623 #ifndef NO_NESTED_PMAP
8624 int pmap_flags;
8625
8626 if (map->terminated) {
8627 /*
8628 * This is the final cleanup of the
8629 * address space being terminated.
8630 * No new mappings are expected and
8631 * we don't really need to unnest the
8632 * shared region (and lose the "global"
8633 * pmap mappings, if applicable).
8634 *
8635 * Tell the pmap layer that we're
8636 * "clean" wrt nesting.
8637 */
8638 pmap_flags = PMAP_UNNEST_CLEAN;
8639 } else {
8640 /*
8641 * We're unmapping part of the nested
8642 * shared region, so we can't keep the
8643 * nested pmap.
8644 */
8645 pmap_flags = 0;
8646 }
8647 pmap_unnest_options(
8648 map->pmap,
8649 (addr64_t)entry->vme_start,
8650 entry->vme_end - entry->vme_start,
8651 pmap_flags);
8652 #endif /* NO_NESTED_PMAP */
8653 if (map->mapped_in_other_pmaps &&
8654 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8655 /* clean up parent map/maps */
8656 vm_map_submap_pmap_clean(
8657 map, entry->vme_start,
8658 entry->vme_end,
8659 VME_SUBMAP(entry),
8660 VME_OFFSET(entry));
8661 }
8662 } else {
8663 vm_map_submap_pmap_clean(
8664 map, entry->vme_start, entry->vme_end,
8665 VME_SUBMAP(entry),
8666 VME_OFFSET(entry));
8667 }
8668 } else if (entry->vme_kernel_object ||
8669 VME_OBJECT(entry) == compressor_object) {
8670 /*
8671 * nothing to do
8672 */
8673 } else if (map->mapped_in_other_pmaps &&
8674 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8675 vm_object_pmap_protect_options(
8676 VME_OBJECT(entry), VME_OFFSET(entry),
8677 entry->vme_end - entry->vme_start,
8678 PMAP_NULL,
8679 PAGE_SIZE,
8680 entry->vme_start,
8681 VM_PROT_NONE,
8682 PMAP_OPTIONS_REMOVE);
8683 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8684 (state & VMDS_KERNEL_PMAP)) {
8685 /* Remove translations associated
8686 * with this range unless the entry
8687 * does not have an object, or
8688 * it's the kernel map or a descendant
8689 * since the platform could potentially
8690 * create "backdoor" mappings invisible
8691 * to the VM. It is expected that
8692 * objectless, non-kernel ranges
8693 * do not have such VM invisible
8694 * translations.
8695 */
8696 pmap_remove_options(map->pmap,
8697 (addr64_t)entry->vme_start,
8698 (addr64_t)entry->vme_end,
8699 PMAP_OPTIONS_REMOVE);
8700 }
8701
8702 #if DEBUG
8703 /*
8704 * All pmap mappings for this map entry must have been
8705 * cleared by now.
8706 */
8707 assert(pmap_is_empty(map->pmap,
8708 entry->vme_start,
8709 entry->vme_end));
8710 #endif /* DEBUG */
8711
8712 if (entry->iokit_acct) {
8713 /* alternate accounting */
8714 DTRACE_VM4(vm_map_iokit_unmapped_region,
8715 vm_map_t, map,
8716 vm_map_offset_t, entry->vme_start,
8717 vm_map_offset_t, entry->vme_end,
8718 int, VME_ALIAS(entry));
8719 vm_map_iokit_unmapped_region(map,
8720 (entry->vme_end -
8721 entry->vme_start));
8722 entry->iokit_acct = FALSE;
8723 entry->use_pmap = FALSE;
8724 }
8725
8726 /* move "s" forward */
8727 s = entry->vme_end;
8728 next = entry->vme_next;
8729 if (!entry->map_aligned) {
8730 vm_map_offset_t rounded_s;
8731
8732 /*
8733 * Skip artificial gap due to mis-aligned entry
8734 * on devices with a page size smaller than the
8735 * map's page size (i.e. 16k task on a 4k device).
8736 */
8737 rounded_s = VM_MAP_ROUND_PAGE(s, VM_MAP_PAGE_MASK(map));
8738 if (next == vm_map_to_entry(map)) {
8739 s = rounded_s;
8740 } else if (s < rounded_s) {
8741 s = MIN(rounded_s, next->vme_start);
8742 }
8743 }
8744 ret.kmr_size += s - entry->vme_start;
8745
8746 if (entry->vme_permanent) {
8747 /*
8748 * A permanent entry can not be removed, so leave it
8749 * in place but remove all access permissions.
8750 */
8751 if (!entry->csm_associated) {
8752 printf("%s:%d %d[%s] map %p entry %p [ 0x%llx - 0x%llx ] submap %d prot 0x%x/0x%x -> 0/0\n",
8753 __FUNCTION__, __LINE__,
8754 proc_selfpid(),
8755 (get_bsdtask_info(current_task())
8756 ? proc_name_address(get_bsdtask_info(current_task()))
8757 : "?"),
8758 map,
8759 entry,
8760 (uint64_t)entry->vme_start,
8761 (uint64_t)entry->vme_end,
8762 entry->is_sub_map,
8763 entry->protection,
8764 entry->max_protection);
8765 }
8766 DTRACE_VM6(vm_map_delete_permanent_prot_none,
8767 vm_map_entry_t, entry,
8768 vm_map_offset_t, entry->vme_start,
8769 vm_map_offset_t, entry->vme_end,
8770 vm_prot_t, entry->protection,
8771 vm_prot_t, entry->max_protection,
8772 int, VME_ALIAS(entry));
8773 entry->protection = VM_PROT_NONE;
8774 entry->max_protection = VM_PROT_NONE;
8775 } else {
8776 vm_map_entry_zap(map, entry, zap_list);
8777 }
8778
8779 entry = next;
8780 next = VM_MAP_ENTRY_NULL;
8781
8782 if ((flags & VM_MAP_REMOVE_NO_YIELD) == 0 && s < end) {
8783 unsigned int last_timestamp = map->timestamp++;
8784
8785 if (lck_rw_lock_yield_exclusive(&map->lock,
8786 LCK_RW_YIELD_ANY_WAITER)) {
8787 if (last_timestamp != map->timestamp + 1) {
8788 state |= VMDS_NEEDS_LOOKUP;
8789 }
8790 } else {
8791 /* we didn't yield, undo our change */
8792 map->timestamp--;
8793 }
8794 }
8795 }
8796
8797 if (map->wait_for_space) {
8798 thread_wakeup((event_t) map);
8799 }
8800
8801 if (state & VMDS_NEEDS_WAKEUP) {
8802 vm_map_entry_wakeup(map);
8803 }
8804
8805 out:
8806 if ((state & VMDS_KERNEL_PMAP) && ret.kmr_return) {
8807 __vm_map_delete_failed_panic(map, start, end, ret.kmr_return);
8808 }
8809
8810 if (state & VMDS_KERNEL_KMEMPTR) {
8811 kmem_free_space(start, end, range_id, &slot);
8812 }
8813
8814 if (state & VMDS_FOUND_GAP) {
8815 DTRACE_VM3(kern_vm_deallocate_gap,
8816 vm_map_offset_t, gap_start,
8817 vm_map_offset_t, save_start,
8818 vm_map_offset_t, save_end);
8819 if (flags & VM_MAP_REMOVE_GAPS_FAIL) {
8820 ret.kmr_return = KERN_INVALID_VALUE;
8821 } else {
8822 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8823 }
8824 }
8825
8826 return ret;
8827 }
8828
8829 kmem_return_t
vm_map_remove_and_unlock(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8830 vm_map_remove_and_unlock(
8831 vm_map_t map,
8832 vm_map_offset_t start,
8833 vm_map_offset_t end,
8834 vmr_flags_t flags,
8835 kmem_guard_t guard)
8836 {
8837 kmem_return_t ret;
8838 VM_MAP_ZAP_DECLARE(zap);
8839
8840 ret = vm_map_delete(map, start, end, flags, guard, &zap);
8841 vm_map_unlock(map);
8842
8843 vm_map_zap_dispose(&zap);
8844
8845 return ret;
8846 }
8847
8848 /*
8849 * vm_map_remove_guard:
8850 *
8851 * Remove the given address range from the target map.
8852 * This is the exported form of vm_map_delete.
8853 */
8854 kmem_return_t
vm_map_remove_guard(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8855 vm_map_remove_guard(
8856 vm_map_t map,
8857 vm_map_offset_t start,
8858 vm_map_offset_t end,
8859 vmr_flags_t flags,
8860 kmem_guard_t guard)
8861 {
8862 vm_map_lock(map);
8863 return vm_map_remove_and_unlock(map, start, end, flags, guard);
8864 }
8865
8866 /*
8867 * vm_map_terminate:
8868 *
8869 * Clean out a task's map.
8870 */
8871 kern_return_t
vm_map_terminate(vm_map_t map)8872 vm_map_terminate(
8873 vm_map_t map)
8874 {
8875 vm_map_lock(map);
8876 map->terminated = TRUE;
8877 vm_map_disable_hole_optimization(map);
8878 (void)vm_map_remove_and_unlock(map, map->min_offset, map->max_offset,
8879 VM_MAP_REMOVE_NO_FLAGS, KMEM_GUARD_NONE);
8880 return KERN_SUCCESS;
8881 }
8882
8883 /*
8884 * Routine: vm_map_copy_allocate
8885 *
8886 * Description:
8887 * Allocates and initializes a map copy object.
8888 */
8889 static vm_map_copy_t
vm_map_copy_allocate(uint16_t type)8890 vm_map_copy_allocate(uint16_t type)
8891 {
8892 vm_map_copy_t new_copy;
8893
8894 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO);
8895 new_copy->type = type;
8896 if (type == VM_MAP_COPY_ENTRY_LIST) {
8897 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8898 vm_map_store_init(&new_copy->cpy_hdr);
8899 }
8900 return new_copy;
8901 }
8902
8903 /*
8904 * Routine: vm_map_copy_discard
8905 *
8906 * Description:
8907 * Dispose of a map copy object (returned by
8908 * vm_map_copyin).
8909 */
8910 void
vm_map_copy_discard(vm_map_copy_t copy)8911 vm_map_copy_discard(
8912 vm_map_copy_t copy)
8913 {
8914 if (copy == VM_MAP_COPY_NULL) {
8915 return;
8916 }
8917
8918 /*
8919 * Assert that the vm_map_copy is coming from the right
8920 * zone and hasn't been forged
8921 */
8922 vm_map_copy_require(copy);
8923
8924 switch (copy->type) {
8925 case VM_MAP_COPY_ENTRY_LIST:
8926 while (vm_map_copy_first_entry(copy) !=
8927 vm_map_copy_to_entry(copy)) {
8928 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8929
8930 vm_map_copy_entry_unlink(copy, entry);
8931 if (entry->is_sub_map) {
8932 vm_map_deallocate(VME_SUBMAP(entry));
8933 } else {
8934 vm_object_deallocate(VME_OBJECT(entry));
8935 }
8936 vm_map_copy_entry_dispose(entry);
8937 }
8938 break;
8939 case VM_MAP_COPY_KERNEL_BUFFER:
8940
8941 /*
8942 * The vm_map_copy_t and possibly the data buffer were
8943 * allocated by a single call to kalloc_data(), i.e. the
8944 * vm_map_copy_t was not allocated out of the zone.
8945 */
8946 if (copy->size > msg_ool_size_small || copy->offset) {
8947 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8948 (long long)copy->size, (long long)copy->offset);
8949 }
8950 kfree_data(copy->cpy_kdata, copy->size);
8951 }
8952 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
8953 }
8954
8955 #if XNU_PLATFORM_MacOSX
8956
8957 /*
8958 * Routine: vm_map_copy_copy
8959 *
8960 * Description:
8961 * Move the information in a map copy object to
8962 * a new map copy object, leaving the old one
8963 * empty.
8964 *
8965 * This is used by kernel routines that need
8966 * to look at out-of-line data (in copyin form)
8967 * before deciding whether to return SUCCESS.
8968 * If the routine returns FAILURE, the original
8969 * copy object will be deallocated; therefore,
8970 * these routines must make a copy of the copy
8971 * object and leave the original empty so that
8972 * deallocation will not fail.
8973 */
8974 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8975 vm_map_copy_copy(
8976 vm_map_copy_t copy)
8977 {
8978 vm_map_copy_t new_copy;
8979
8980 if (copy == VM_MAP_COPY_NULL) {
8981 return VM_MAP_COPY_NULL;
8982 }
8983
8984 /*
8985 * Assert that the vm_map_copy is coming from the right
8986 * zone and hasn't been forged
8987 */
8988 vm_map_copy_require(copy);
8989
8990 /*
8991 * Allocate a new copy object, and copy the information
8992 * from the old one into it.
8993 */
8994
8995 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8996 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8997 #if __has_feature(ptrauth_calls)
8998 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8999 new_copy->cpy_kdata = copy->cpy_kdata;
9000 }
9001 #endif
9002
9003 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
9004 /*
9005 * The links in the entry chain must be
9006 * changed to point to the new copy object.
9007 */
9008 vm_map_copy_first_entry(copy)->vme_prev
9009 = vm_map_copy_to_entry(new_copy);
9010 vm_map_copy_last_entry(copy)->vme_next
9011 = vm_map_copy_to_entry(new_copy);
9012 }
9013
9014 /*
9015 * Change the old copy object into one that contains
9016 * nothing to be deallocated.
9017 */
9018 bzero(copy, sizeof(struct vm_map_copy));
9019 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
9020
9021 /*
9022 * Return the new object.
9023 */
9024 return new_copy;
9025 }
9026
9027 #endif /* XNU_PLATFORM_MacOSX */
9028
9029 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)9030 vm_map_entry_is_overwritable(
9031 vm_map_t dst_map __unused,
9032 vm_map_entry_t entry)
9033 {
9034 if (!(entry->protection & VM_PROT_WRITE)) {
9035 /* can't overwrite if not writable */
9036 return FALSE;
9037 }
9038 #if !__x86_64__
9039 if (entry->used_for_jit &&
9040 vm_map_cs_enforcement(dst_map) &&
9041 !dst_map->cs_debugged) {
9042 /*
9043 * Can't overwrite a JIT region while cs_enforced
9044 * and not cs_debugged.
9045 */
9046 return FALSE;
9047 }
9048
9049 #if __arm64e__
9050 /* Do not allow overwrite HW assisted TPRO entries */
9051 if (entry->used_for_tpro) {
9052 return FALSE;
9053 }
9054 #endif /* __arm64e__ */
9055
9056 if (entry->vme_permanent) {
9057 if (entry->is_sub_map) {
9058 /*
9059 * We can't tell if the submap contains "permanent"
9060 * entries within the range targeted by the caller.
9061 * The caller will have to check for that with
9062 * vm_map_overwrite_submap_recurse() for example.
9063 */
9064 } else {
9065 /*
9066 * Do not allow overwriting of a "permanent"
9067 * entry.
9068 */
9069 DTRACE_VM6(vm_map_delete_permanent_deny_overwrite,
9070 vm_map_entry_t, entry,
9071 vm_map_offset_t, entry->vme_start,
9072 vm_map_offset_t, entry->vme_end,
9073 vm_prot_t, entry->protection,
9074 vm_prot_t, entry->max_protection,
9075 int, VME_ALIAS(entry));
9076 return FALSE;
9077 }
9078 }
9079 #endif /* !__x86_64__ */
9080 return TRUE;
9081 }
9082
9083 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)9084 vm_map_overwrite_submap_recurse(
9085 vm_map_t dst_map,
9086 vm_map_offset_t dst_addr,
9087 vm_map_size_t dst_size)
9088 {
9089 vm_map_offset_t dst_end;
9090 vm_map_entry_t tmp_entry;
9091 vm_map_entry_t entry;
9092 kern_return_t result;
9093 boolean_t encountered_sub_map = FALSE;
9094
9095
9096
9097 /*
9098 * Verify that the destination is all writeable
9099 * initially. We have to trunc the destination
9100 * address and round the copy size or we'll end up
9101 * splitting entries in strange ways.
9102 */
9103
9104 dst_end = vm_map_round_page(dst_addr + dst_size,
9105 VM_MAP_PAGE_MASK(dst_map));
9106 vm_map_lock(dst_map);
9107
9108 start_pass_1:
9109 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9110 vm_map_unlock(dst_map);
9111 return KERN_INVALID_ADDRESS;
9112 }
9113
9114 vm_map_clip_start(dst_map,
9115 tmp_entry,
9116 vm_map_trunc_page(dst_addr,
9117 VM_MAP_PAGE_MASK(dst_map)));
9118 if (tmp_entry->is_sub_map) {
9119 /* clipping did unnest if needed */
9120 assert(!tmp_entry->use_pmap);
9121 }
9122
9123 for (entry = tmp_entry;;) {
9124 vm_map_entry_t next;
9125
9126 next = entry->vme_next;
9127 while (entry->is_sub_map) {
9128 vm_map_offset_t sub_start;
9129 vm_map_offset_t sub_end;
9130 vm_map_offset_t local_end;
9131
9132 if (entry->in_transition) {
9133 /*
9134 * Say that we are waiting, and wait for entry.
9135 */
9136 entry->needs_wakeup = TRUE;
9137 vm_map_entry_wait(dst_map, THREAD_UNINT);
9138
9139 goto start_pass_1;
9140 }
9141
9142 encountered_sub_map = TRUE;
9143 sub_start = VME_OFFSET(entry);
9144
9145 if (entry->vme_end < dst_end) {
9146 sub_end = entry->vme_end;
9147 } else {
9148 sub_end = dst_end;
9149 }
9150 sub_end -= entry->vme_start;
9151 sub_end += VME_OFFSET(entry);
9152 local_end = entry->vme_end;
9153 vm_map_unlock(dst_map);
9154
9155 result = vm_map_overwrite_submap_recurse(
9156 VME_SUBMAP(entry),
9157 sub_start,
9158 sub_end - sub_start);
9159
9160 if (result != KERN_SUCCESS) {
9161 return result;
9162 }
9163 if (dst_end <= entry->vme_end) {
9164 return KERN_SUCCESS;
9165 }
9166 vm_map_lock(dst_map);
9167 if (!vm_map_lookup_entry(dst_map, local_end,
9168 &tmp_entry)) {
9169 vm_map_unlock(dst_map);
9170 return KERN_INVALID_ADDRESS;
9171 }
9172 entry = tmp_entry;
9173 next = entry->vme_next;
9174 }
9175
9176 if (!(entry->protection & VM_PROT_WRITE)) {
9177 vm_map_unlock(dst_map);
9178 return KERN_PROTECTION_FAILURE;
9179 }
9180
9181 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9182 vm_map_unlock(dst_map);
9183 return KERN_PROTECTION_FAILURE;
9184 }
9185
9186 /*
9187 * If the entry is in transition, we must wait
9188 * for it to exit that state. Anything could happen
9189 * when we unlock the map, so start over.
9190 */
9191 if (entry->in_transition) {
9192 /*
9193 * Say that we are waiting, and wait for entry.
9194 */
9195 entry->needs_wakeup = TRUE;
9196 vm_map_entry_wait(dst_map, THREAD_UNINT);
9197
9198 goto start_pass_1;
9199 }
9200
9201 /*
9202 * our range is contained completely within this map entry
9203 */
9204 if (dst_end <= entry->vme_end) {
9205 vm_map_unlock(dst_map);
9206 return KERN_SUCCESS;
9207 }
9208 /*
9209 * check that range specified is contiguous region
9210 */
9211 if ((next == vm_map_to_entry(dst_map)) ||
9212 (next->vme_start != entry->vme_end)) {
9213 vm_map_unlock(dst_map);
9214 return KERN_INVALID_ADDRESS;
9215 }
9216
9217 /*
9218 * Check for permanent objects in the destination.
9219 */
9220 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9221 ((!VME_OBJECT(entry)->internal) ||
9222 (VME_OBJECT(entry)->true_share))) {
9223 if (encountered_sub_map) {
9224 vm_map_unlock(dst_map);
9225 return KERN_FAILURE;
9226 }
9227 }
9228
9229
9230 entry = next;
9231 }/* for */
9232 vm_map_unlock(dst_map);
9233 return KERN_SUCCESS;
9234 }
9235
9236 /*
9237 * Routine: vm_map_copy_overwrite
9238 *
9239 * Description:
9240 * Copy the memory described by the map copy
9241 * object (copy; returned by vm_map_copyin) onto
9242 * the specified destination region (dst_map, dst_addr).
9243 * The destination must be writeable.
9244 *
9245 * Unlike vm_map_copyout, this routine actually
9246 * writes over previously-mapped memory. If the
9247 * previous mapping was to a permanent (user-supplied)
9248 * memory object, it is preserved.
9249 *
9250 * The attributes (protection and inheritance) of the
9251 * destination region are preserved.
9252 *
9253 * If successful, consumes the copy object.
9254 * Otherwise, the caller is responsible for it.
9255 *
9256 * Implementation notes:
9257 * To overwrite aligned temporary virtual memory, it is
9258 * sufficient to remove the previous mapping and insert
9259 * the new copy. This replacement is done either on
9260 * the whole region (if no permanent virtual memory
9261 * objects are embedded in the destination region) or
9262 * in individual map entries.
9263 *
9264 * To overwrite permanent virtual memory , it is necessary
9265 * to copy each page, as the external memory management
9266 * interface currently does not provide any optimizations.
9267 *
9268 * Unaligned memory also has to be copied. It is possible
9269 * to use 'vm_trickery' to copy the aligned data. This is
9270 * not done but not hard to implement.
9271 *
9272 * Once a page of permanent memory has been overwritten,
9273 * it is impossible to interrupt this function; otherwise,
9274 * the call would be neither atomic nor location-independent.
9275 * The kernel-state portion of a user thread must be
9276 * interruptible.
9277 *
9278 * It may be expensive to forward all requests that might
9279 * overwrite permanent memory (vm_write, vm_copy) to
9280 * uninterruptible kernel threads. This routine may be
9281 * called by interruptible threads; however, success is
9282 * not guaranteed -- if the request cannot be performed
9283 * atomically and interruptibly, an error indication is
9284 * returned.
9285 *
9286 * Callers of this function must call vm_map_copy_require on
9287 * previously created vm_map_copy_t or pass a newly created
9288 * one to ensure that it hasn't been forged.
9289 */
9290
9291 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)9292 vm_map_copy_overwrite_nested(
9293 vm_map_t dst_map,
9294 vm_map_address_t dst_addr,
9295 vm_map_copy_t copy,
9296 boolean_t interruptible,
9297 pmap_t pmap,
9298 boolean_t discard_on_success)
9299 {
9300 vm_map_offset_t dst_end;
9301 vm_map_entry_t tmp_entry;
9302 vm_map_entry_t entry;
9303 kern_return_t kr;
9304 boolean_t aligned = TRUE;
9305 boolean_t contains_permanent_objects = FALSE;
9306 boolean_t encountered_sub_map = FALSE;
9307 vm_map_offset_t base_addr;
9308 vm_map_size_t copy_size;
9309 vm_map_size_t total_size;
9310 uint16_t copy_page_shift;
9311
9312 /*
9313 * Check for special kernel buffer allocated
9314 * by new_ipc_kmsg_copyin.
9315 */
9316
9317 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9318 return vm_map_copyout_kernel_buffer(
9319 dst_map, &dst_addr,
9320 copy, copy->size, TRUE, discard_on_success);
9321 }
9322
9323 /*
9324 * Only works for entry lists at the moment. Will
9325 * support page lists later.
9326 */
9327
9328 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9329
9330 if (copy->size == 0) {
9331 if (discard_on_success) {
9332 vm_map_copy_discard(copy);
9333 }
9334 return KERN_SUCCESS;
9335 }
9336
9337 copy_page_shift = copy->cpy_hdr.page_shift;
9338
9339 /*
9340 * Verify that the destination is all writeable
9341 * initially. We have to trunc the destination
9342 * address and round the copy size or we'll end up
9343 * splitting entries in strange ways.
9344 */
9345
9346 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9347 VM_MAP_PAGE_MASK(dst_map)) ||
9348 !VM_MAP_PAGE_ALIGNED(copy->offset,
9349 VM_MAP_PAGE_MASK(dst_map)) ||
9350 !VM_MAP_PAGE_ALIGNED(dst_addr,
9351 VM_MAP_PAGE_MASK(dst_map)) ||
9352 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9353 aligned = FALSE;
9354 dst_end = vm_map_round_page(dst_addr + copy->size,
9355 VM_MAP_PAGE_MASK(dst_map));
9356 } else {
9357 dst_end = dst_addr + copy->size;
9358 }
9359
9360 vm_map_lock(dst_map);
9361
9362 /* LP64todo - remove this check when vm_map_commpage64()
9363 * no longer has to stuff in a map_entry for the commpage
9364 * above the map's max_offset.
9365 */
9366 if (dst_addr >= dst_map->max_offset) {
9367 vm_map_unlock(dst_map);
9368 return KERN_INVALID_ADDRESS;
9369 }
9370
9371 start_pass_1:
9372 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9373 vm_map_unlock(dst_map);
9374 return KERN_INVALID_ADDRESS;
9375 }
9376 vm_map_clip_start(dst_map,
9377 tmp_entry,
9378 vm_map_trunc_page(dst_addr,
9379 VM_MAP_PAGE_MASK(dst_map)));
9380 for (entry = tmp_entry;;) {
9381 vm_map_entry_t next = entry->vme_next;
9382
9383 while (entry->is_sub_map) {
9384 vm_map_offset_t sub_start;
9385 vm_map_offset_t sub_end;
9386 vm_map_offset_t local_end;
9387
9388 if (entry->in_transition) {
9389 /*
9390 * Say that we are waiting, and wait for entry.
9391 */
9392 entry->needs_wakeup = TRUE;
9393 vm_map_entry_wait(dst_map, THREAD_UNINT);
9394
9395 goto start_pass_1;
9396 }
9397
9398 local_end = entry->vme_end;
9399 if (!(entry->needs_copy)) {
9400 /* if needs_copy we are a COW submap */
9401 /* in such a case we just replace so */
9402 /* there is no need for the follow- */
9403 /* ing check. */
9404 encountered_sub_map = TRUE;
9405 sub_start = VME_OFFSET(entry);
9406
9407 if (entry->vme_end < dst_end) {
9408 sub_end = entry->vme_end;
9409 } else {
9410 sub_end = dst_end;
9411 }
9412 sub_end -= entry->vme_start;
9413 sub_end += VME_OFFSET(entry);
9414 vm_map_unlock(dst_map);
9415
9416 kr = vm_map_overwrite_submap_recurse(
9417 VME_SUBMAP(entry),
9418 sub_start,
9419 sub_end - sub_start);
9420 if (kr != KERN_SUCCESS) {
9421 return kr;
9422 }
9423 vm_map_lock(dst_map);
9424 }
9425
9426 if (dst_end <= entry->vme_end) {
9427 goto start_overwrite;
9428 }
9429 if (!vm_map_lookup_entry(dst_map, local_end,
9430 &entry)) {
9431 vm_map_unlock(dst_map);
9432 return KERN_INVALID_ADDRESS;
9433 }
9434 next = entry->vme_next;
9435 }
9436
9437 if (!(entry->protection & VM_PROT_WRITE)) {
9438 vm_map_unlock(dst_map);
9439 return KERN_PROTECTION_FAILURE;
9440 }
9441
9442 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9443 vm_map_unlock(dst_map);
9444 return KERN_PROTECTION_FAILURE;
9445 }
9446
9447 /*
9448 * If the entry is in transition, we must wait
9449 * for it to exit that state. Anything could happen
9450 * when we unlock the map, so start over.
9451 */
9452 if (entry->in_transition) {
9453 /*
9454 * Say that we are waiting, and wait for entry.
9455 */
9456 entry->needs_wakeup = TRUE;
9457 vm_map_entry_wait(dst_map, THREAD_UNINT);
9458
9459 goto start_pass_1;
9460 }
9461
9462 /*
9463 * our range is contained completely within this map entry
9464 */
9465 if (dst_end <= entry->vme_end) {
9466 break;
9467 }
9468 /*
9469 * check that range specified is contiguous region
9470 */
9471 if ((next == vm_map_to_entry(dst_map)) ||
9472 (next->vme_start != entry->vme_end)) {
9473 vm_map_unlock(dst_map);
9474 return KERN_INVALID_ADDRESS;
9475 }
9476
9477
9478 /*
9479 * Check for permanent objects in the destination.
9480 */
9481 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9482 ((!VME_OBJECT(entry)->internal) ||
9483 (VME_OBJECT(entry)->true_share))) {
9484 contains_permanent_objects = TRUE;
9485 }
9486
9487 entry = next;
9488 }/* for */
9489
9490 start_overwrite:
9491 /*
9492 * If there are permanent objects in the destination, then
9493 * the copy cannot be interrupted.
9494 */
9495
9496 if (interruptible && contains_permanent_objects) {
9497 vm_map_unlock(dst_map);
9498 return KERN_FAILURE; /* XXX */
9499 }
9500
9501 /*
9502 *
9503 * Make a second pass, overwriting the data
9504 * At the beginning of each loop iteration,
9505 * the next entry to be overwritten is "tmp_entry"
9506 * (initially, the value returned from the lookup above),
9507 * and the starting address expected in that entry
9508 * is "start".
9509 */
9510
9511 total_size = copy->size;
9512 if (encountered_sub_map) {
9513 copy_size = 0;
9514 /* re-calculate tmp_entry since we've had the map */
9515 /* unlocked */
9516 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9517 vm_map_unlock(dst_map);
9518 return KERN_INVALID_ADDRESS;
9519 }
9520 } else {
9521 copy_size = copy->size;
9522 }
9523
9524 base_addr = dst_addr;
9525 while (TRUE) {
9526 /* deconstruct the copy object and do in parts */
9527 /* only in sub_map, interruptable case */
9528 vm_map_entry_t copy_entry;
9529 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9530 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9531 int nentries;
9532 int remaining_entries = 0;
9533 vm_map_offset_t new_offset = 0;
9534
9535 for (entry = tmp_entry; copy_size == 0;) {
9536 vm_map_entry_t next;
9537
9538 next = entry->vme_next;
9539
9540 /* tmp_entry and base address are moved along */
9541 /* each time we encounter a sub-map. Otherwise */
9542 /* entry can outpase tmp_entry, and the copy_size */
9543 /* may reflect the distance between them */
9544 /* if the current entry is found to be in transition */
9545 /* we will start over at the beginning or the last */
9546 /* encounter of a submap as dictated by base_addr */
9547 /* we will zero copy_size accordingly. */
9548 if (entry->in_transition) {
9549 /*
9550 * Say that we are waiting, and wait for entry.
9551 */
9552 entry->needs_wakeup = TRUE;
9553 vm_map_entry_wait(dst_map, THREAD_UNINT);
9554
9555 if (!vm_map_lookup_entry(dst_map, base_addr,
9556 &tmp_entry)) {
9557 vm_map_unlock(dst_map);
9558 return KERN_INVALID_ADDRESS;
9559 }
9560 copy_size = 0;
9561 entry = tmp_entry;
9562 continue;
9563 }
9564 if (entry->is_sub_map) {
9565 vm_map_offset_t sub_start;
9566 vm_map_offset_t sub_end;
9567 vm_map_offset_t local_end;
9568
9569 if (entry->needs_copy) {
9570 /* if this is a COW submap */
9571 /* just back the range with a */
9572 /* anonymous entry */
9573 assert(!entry->vme_permanent);
9574 if (entry->vme_end < dst_end) {
9575 sub_end = entry->vme_end;
9576 } else {
9577 sub_end = dst_end;
9578 }
9579 if (entry->vme_start < base_addr) {
9580 sub_start = base_addr;
9581 } else {
9582 sub_start = entry->vme_start;
9583 }
9584 vm_map_clip_end(
9585 dst_map, entry, sub_end);
9586 vm_map_clip_start(
9587 dst_map, entry, sub_start);
9588 assert(!entry->use_pmap);
9589 assert(!entry->iokit_acct);
9590 entry->use_pmap = TRUE;
9591 vm_map_deallocate(VME_SUBMAP(entry));
9592 assert(!entry->vme_permanent);
9593 VME_OBJECT_SET(entry, VM_OBJECT_NULL, false, 0);
9594 VME_OFFSET_SET(entry, 0);
9595 entry->is_shared = FALSE;
9596 entry->needs_copy = FALSE;
9597 entry->protection = VM_PROT_DEFAULT;
9598 entry->max_protection = VM_PROT_ALL;
9599 entry->wired_count = 0;
9600 entry->user_wired_count = 0;
9601 if (entry->inheritance
9602 == VM_INHERIT_SHARE) {
9603 entry->inheritance = VM_INHERIT_COPY;
9604 }
9605 continue;
9606 }
9607 /* first take care of any non-sub_map */
9608 /* entries to send */
9609 if (base_addr < entry->vme_start) {
9610 /* stuff to send */
9611 copy_size =
9612 entry->vme_start - base_addr;
9613 break;
9614 }
9615 sub_start = VME_OFFSET(entry);
9616
9617 if (entry->vme_end < dst_end) {
9618 sub_end = entry->vme_end;
9619 } else {
9620 sub_end = dst_end;
9621 }
9622 sub_end -= entry->vme_start;
9623 sub_end += VME_OFFSET(entry);
9624 local_end = entry->vme_end;
9625 vm_map_unlock(dst_map);
9626 copy_size = sub_end - sub_start;
9627
9628 /* adjust the copy object */
9629 if (total_size > copy_size) {
9630 vm_map_size_t local_size = 0;
9631 vm_map_size_t entry_size;
9632
9633 nentries = 1;
9634 new_offset = copy->offset;
9635 copy_entry = vm_map_copy_first_entry(copy);
9636 while (copy_entry !=
9637 vm_map_copy_to_entry(copy)) {
9638 entry_size = copy_entry->vme_end -
9639 copy_entry->vme_start;
9640 if ((local_size < copy_size) &&
9641 ((local_size + entry_size)
9642 >= copy_size)) {
9643 vm_map_copy_clip_end(copy,
9644 copy_entry,
9645 copy_entry->vme_start +
9646 (copy_size - local_size));
9647 entry_size = copy_entry->vme_end -
9648 copy_entry->vme_start;
9649 local_size += entry_size;
9650 new_offset += entry_size;
9651 }
9652 if (local_size >= copy_size) {
9653 next_copy = copy_entry->vme_next;
9654 copy_entry->vme_next =
9655 vm_map_copy_to_entry(copy);
9656 previous_prev =
9657 copy->cpy_hdr.links.prev;
9658 copy->cpy_hdr.links.prev = copy_entry;
9659 copy->size = copy_size;
9660 remaining_entries =
9661 copy->cpy_hdr.nentries;
9662 remaining_entries -= nentries;
9663 copy->cpy_hdr.nentries = nentries;
9664 break;
9665 } else {
9666 local_size += entry_size;
9667 new_offset += entry_size;
9668 nentries++;
9669 }
9670 copy_entry = copy_entry->vme_next;
9671 }
9672 }
9673
9674 if ((entry->use_pmap) && (pmap == NULL)) {
9675 kr = vm_map_copy_overwrite_nested(
9676 VME_SUBMAP(entry),
9677 sub_start,
9678 copy,
9679 interruptible,
9680 VME_SUBMAP(entry)->pmap,
9681 TRUE);
9682 } else if (pmap != NULL) {
9683 kr = vm_map_copy_overwrite_nested(
9684 VME_SUBMAP(entry),
9685 sub_start,
9686 copy,
9687 interruptible, pmap,
9688 TRUE);
9689 } else {
9690 kr = vm_map_copy_overwrite_nested(
9691 VME_SUBMAP(entry),
9692 sub_start,
9693 copy,
9694 interruptible,
9695 dst_map->pmap,
9696 TRUE);
9697 }
9698 if (kr != KERN_SUCCESS) {
9699 if (next_copy != NULL) {
9700 copy->cpy_hdr.nentries +=
9701 remaining_entries;
9702 copy->cpy_hdr.links.prev->vme_next =
9703 next_copy;
9704 copy->cpy_hdr.links.prev
9705 = previous_prev;
9706 copy->size = total_size;
9707 }
9708 return kr;
9709 }
9710 if (dst_end <= local_end) {
9711 return KERN_SUCCESS;
9712 }
9713 /* otherwise copy no longer exists, it was */
9714 /* destroyed after successful copy_overwrite */
9715 copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
9716 copy->offset = new_offset;
9717 copy->cpy_hdr.page_shift = copy_page_shift;
9718
9719 total_size -= copy_size;
9720 copy_size = 0;
9721 /* put back remainder of copy in container */
9722 if (next_copy != NULL) {
9723 copy->cpy_hdr.nentries = remaining_entries;
9724 copy->cpy_hdr.links.next = next_copy;
9725 copy->cpy_hdr.links.prev = previous_prev;
9726 copy->size = total_size;
9727 next_copy->vme_prev =
9728 vm_map_copy_to_entry(copy);
9729 next_copy = NULL;
9730 }
9731 base_addr = local_end;
9732 vm_map_lock(dst_map);
9733 if (!vm_map_lookup_entry(dst_map,
9734 local_end, &tmp_entry)) {
9735 vm_map_unlock(dst_map);
9736 return KERN_INVALID_ADDRESS;
9737 }
9738 entry = tmp_entry;
9739 continue;
9740 }
9741 if (dst_end <= entry->vme_end) {
9742 copy_size = dst_end - base_addr;
9743 break;
9744 }
9745
9746 if ((next == vm_map_to_entry(dst_map)) ||
9747 (next->vme_start != entry->vme_end)) {
9748 vm_map_unlock(dst_map);
9749 return KERN_INVALID_ADDRESS;
9750 }
9751
9752 entry = next;
9753 }/* for */
9754
9755 next_copy = NULL;
9756 nentries = 1;
9757
9758 /* adjust the copy object */
9759 if (total_size > copy_size) {
9760 vm_map_size_t local_size = 0;
9761 vm_map_size_t entry_size;
9762
9763 new_offset = copy->offset;
9764 copy_entry = vm_map_copy_first_entry(copy);
9765 while (copy_entry != vm_map_copy_to_entry(copy)) {
9766 entry_size = copy_entry->vme_end -
9767 copy_entry->vme_start;
9768 if ((local_size < copy_size) &&
9769 ((local_size + entry_size)
9770 >= copy_size)) {
9771 vm_map_copy_clip_end(copy, copy_entry,
9772 copy_entry->vme_start +
9773 (copy_size - local_size));
9774 entry_size = copy_entry->vme_end -
9775 copy_entry->vme_start;
9776 local_size += entry_size;
9777 new_offset += entry_size;
9778 }
9779 if (local_size >= copy_size) {
9780 next_copy = copy_entry->vme_next;
9781 copy_entry->vme_next =
9782 vm_map_copy_to_entry(copy);
9783 previous_prev =
9784 copy->cpy_hdr.links.prev;
9785 copy->cpy_hdr.links.prev = copy_entry;
9786 copy->size = copy_size;
9787 remaining_entries =
9788 copy->cpy_hdr.nentries;
9789 remaining_entries -= nentries;
9790 copy->cpy_hdr.nentries = nentries;
9791 break;
9792 } else {
9793 local_size += entry_size;
9794 new_offset += entry_size;
9795 nentries++;
9796 }
9797 copy_entry = copy_entry->vme_next;
9798 }
9799 }
9800
9801 if (aligned) {
9802 pmap_t local_pmap;
9803
9804 if (pmap) {
9805 local_pmap = pmap;
9806 } else {
9807 local_pmap = dst_map->pmap;
9808 }
9809
9810 if ((kr = vm_map_copy_overwrite_aligned(
9811 dst_map, tmp_entry, copy,
9812 base_addr, local_pmap)) != KERN_SUCCESS) {
9813 if (next_copy != NULL) {
9814 copy->cpy_hdr.nentries +=
9815 remaining_entries;
9816 copy->cpy_hdr.links.prev->vme_next =
9817 next_copy;
9818 copy->cpy_hdr.links.prev =
9819 previous_prev;
9820 copy->size += copy_size;
9821 }
9822 return kr;
9823 }
9824 vm_map_unlock(dst_map);
9825 } else {
9826 /*
9827 * Performance gain:
9828 *
9829 * if the copy and dst address are misaligned but the same
9830 * offset within the page we can copy_not_aligned the
9831 * misaligned parts and copy aligned the rest. If they are
9832 * aligned but len is unaligned we simply need to copy
9833 * the end bit unaligned. We'll need to split the misaligned
9834 * bits of the region in this case !
9835 */
9836 /* ALWAYS UNLOCKS THE dst_map MAP */
9837 kr = vm_map_copy_overwrite_unaligned(
9838 dst_map,
9839 tmp_entry,
9840 copy,
9841 base_addr,
9842 discard_on_success);
9843 if (kr != KERN_SUCCESS) {
9844 if (next_copy != NULL) {
9845 copy->cpy_hdr.nentries +=
9846 remaining_entries;
9847 copy->cpy_hdr.links.prev->vme_next =
9848 next_copy;
9849 copy->cpy_hdr.links.prev =
9850 previous_prev;
9851 copy->size += copy_size;
9852 }
9853 return kr;
9854 }
9855 }
9856 total_size -= copy_size;
9857 if (total_size == 0) {
9858 break;
9859 }
9860 base_addr += copy_size;
9861 copy_size = 0;
9862 copy->offset = new_offset;
9863 if (next_copy != NULL) {
9864 copy->cpy_hdr.nentries = remaining_entries;
9865 copy->cpy_hdr.links.next = next_copy;
9866 copy->cpy_hdr.links.prev = previous_prev;
9867 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9868 copy->size = total_size;
9869 }
9870 vm_map_lock(dst_map);
9871 while (TRUE) {
9872 if (!vm_map_lookup_entry(dst_map,
9873 base_addr, &tmp_entry)) {
9874 vm_map_unlock(dst_map);
9875 return KERN_INVALID_ADDRESS;
9876 }
9877 if (tmp_entry->in_transition) {
9878 entry->needs_wakeup = TRUE;
9879 vm_map_entry_wait(dst_map, THREAD_UNINT);
9880 } else {
9881 break;
9882 }
9883 }
9884 vm_map_clip_start(dst_map,
9885 tmp_entry,
9886 vm_map_trunc_page(base_addr,
9887 VM_MAP_PAGE_MASK(dst_map)));
9888
9889 entry = tmp_entry;
9890 } /* while */
9891
9892 /*
9893 * Throw away the vm_map_copy object
9894 */
9895 if (discard_on_success) {
9896 vm_map_copy_discard(copy);
9897 }
9898
9899 return KERN_SUCCESS;
9900 }/* vm_map_copy_overwrite */
9901
9902 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9903 vm_map_copy_overwrite(
9904 vm_map_t dst_map,
9905 vm_map_offset_t dst_addr,
9906 vm_map_copy_t copy,
9907 vm_map_size_t copy_size,
9908 boolean_t interruptible)
9909 {
9910 vm_map_size_t head_size, tail_size;
9911 vm_map_copy_t head_copy, tail_copy;
9912 vm_map_offset_t head_addr, tail_addr;
9913 vm_map_entry_t entry;
9914 kern_return_t kr;
9915 vm_map_offset_t effective_page_mask, effective_page_size;
9916 uint16_t copy_page_shift;
9917
9918 head_size = 0;
9919 tail_size = 0;
9920 head_copy = NULL;
9921 tail_copy = NULL;
9922 head_addr = 0;
9923 tail_addr = 0;
9924
9925 /*
9926 * Check for null copy object.
9927 */
9928 if (copy == VM_MAP_COPY_NULL) {
9929 return KERN_SUCCESS;
9930 }
9931
9932 if (__improbable(vm_map_range_overflows(dst_map, dst_addr, copy_size))) {
9933 return KERN_INVALID_ADDRESS;
9934 }
9935
9936 /*
9937 * Assert that the vm_map_copy is coming from the right
9938 * zone and hasn't been forged
9939 */
9940 vm_map_copy_require(copy);
9941
9942 if (interruptible ||
9943 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9944 /*
9945 * We can't split the "copy" map if we're interruptible
9946 * or if we don't have a "copy" map...
9947 */
9948 blunt_copy:
9949 return vm_map_copy_overwrite_nested(dst_map,
9950 dst_addr,
9951 copy,
9952 interruptible,
9953 (pmap_t) NULL,
9954 TRUE);
9955 }
9956
9957 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9958 if (copy_page_shift < PAGE_SHIFT ||
9959 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9960 goto blunt_copy;
9961 }
9962
9963 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9964 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9965 } else {
9966 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9967 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9968 effective_page_mask);
9969 }
9970 effective_page_size = effective_page_mask + 1;
9971
9972 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9973 /*
9974 * Too small to bother with optimizing...
9975 */
9976 goto blunt_copy;
9977 }
9978
9979 if ((dst_addr & effective_page_mask) !=
9980 (copy->offset & effective_page_mask)) {
9981 /*
9982 * Incompatible mis-alignment of source and destination...
9983 */
9984 goto blunt_copy;
9985 }
9986
9987 /*
9988 * Proper alignment or identical mis-alignment at the beginning.
9989 * Let's try and do a small unaligned copy first (if needed)
9990 * and then an aligned copy for the rest.
9991 */
9992 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9993 head_addr = dst_addr;
9994 head_size = (effective_page_size -
9995 (copy->offset & effective_page_mask));
9996 head_size = MIN(head_size, copy_size);
9997 }
9998 if (!vm_map_page_aligned(copy->offset + copy_size,
9999 effective_page_mask)) {
10000 /*
10001 * Mis-alignment at the end.
10002 * Do an aligned copy up to the last page and
10003 * then an unaligned copy for the remaining bytes.
10004 */
10005 tail_size = ((copy->offset + copy_size) &
10006 effective_page_mask);
10007 tail_size = MIN(tail_size, copy_size);
10008 tail_addr = dst_addr + copy_size - tail_size;
10009 assert(tail_addr >= head_addr + head_size);
10010 }
10011 assert(head_size + tail_size <= copy_size);
10012
10013 if (head_size + tail_size == copy_size) {
10014 /*
10015 * It's all unaligned, no optimization possible...
10016 */
10017 goto blunt_copy;
10018 }
10019
10020 /*
10021 * Can't optimize if there are any submaps in the
10022 * destination due to the way we free the "copy" map
10023 * progressively in vm_map_copy_overwrite_nested()
10024 * in that case.
10025 */
10026 vm_map_lock_read(dst_map);
10027 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
10028 vm_map_unlock_read(dst_map);
10029 goto blunt_copy;
10030 }
10031 for (;
10032 (entry != vm_map_to_entry(dst_map) &&
10033 entry->vme_start < dst_addr + copy_size);
10034 entry = entry->vme_next) {
10035 if (entry->is_sub_map) {
10036 vm_map_unlock_read(dst_map);
10037 goto blunt_copy;
10038 }
10039 }
10040 vm_map_unlock_read(dst_map);
10041
10042 if (head_size) {
10043 /*
10044 * Unaligned copy of the first "head_size" bytes, to reach
10045 * a page boundary.
10046 */
10047
10048 /*
10049 * Extract "head_copy" out of "copy".
10050 */
10051 head_copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
10052 head_copy->cpy_hdr.entries_pageable =
10053 copy->cpy_hdr.entries_pageable;
10054 head_copy->cpy_hdr.page_shift = copy_page_shift;
10055
10056 entry = vm_map_copy_first_entry(copy);
10057 if (entry->vme_end < copy->offset + head_size) {
10058 head_size = entry->vme_end - copy->offset;
10059 }
10060
10061 head_copy->offset = copy->offset;
10062 head_copy->size = head_size;
10063 copy->offset += head_size;
10064 copy->size -= head_size;
10065 copy_size -= head_size;
10066 assert(copy_size > 0);
10067
10068 vm_map_copy_clip_end(copy, entry, copy->offset);
10069 vm_map_copy_entry_unlink(copy, entry);
10070 vm_map_copy_entry_link(head_copy,
10071 vm_map_copy_to_entry(head_copy),
10072 entry);
10073
10074 /*
10075 * Do the unaligned copy.
10076 */
10077 kr = vm_map_copy_overwrite_nested(dst_map,
10078 head_addr,
10079 head_copy,
10080 interruptible,
10081 (pmap_t) NULL,
10082 FALSE);
10083 if (kr != KERN_SUCCESS) {
10084 goto done;
10085 }
10086 }
10087
10088 if (tail_size) {
10089 /*
10090 * Extract "tail_copy" out of "copy".
10091 */
10092 tail_copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
10093 tail_copy->cpy_hdr.entries_pageable =
10094 copy->cpy_hdr.entries_pageable;
10095 tail_copy->cpy_hdr.page_shift = copy_page_shift;
10096
10097 tail_copy->offset = copy->offset + copy_size - tail_size;
10098 tail_copy->size = tail_size;
10099
10100 copy->size -= tail_size;
10101 copy_size -= tail_size;
10102 assert(copy_size > 0);
10103
10104 entry = vm_map_copy_last_entry(copy);
10105 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
10106 entry = vm_map_copy_last_entry(copy);
10107 vm_map_copy_entry_unlink(copy, entry);
10108 vm_map_copy_entry_link(tail_copy,
10109 vm_map_copy_last_entry(tail_copy),
10110 entry);
10111 }
10112
10113 /*
10114 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
10115 * we want to avoid TOCTOU issues w.r.t copy->size but
10116 * we don't need to change vm_map_copy_overwrite_nested()
10117 * and all other vm_map_copy_overwrite variants.
10118 *
10119 * So we assign the original copy_size that was passed into
10120 * this routine back to copy.
10121 *
10122 * This use of local 'copy_size' passed into this routine is
10123 * to try and protect against TOCTOU attacks where the kernel
10124 * has been exploited. We don't expect this to be an issue
10125 * during normal system operation.
10126 */
10127 assertf(copy->size == copy_size,
10128 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
10129 copy->size = copy_size;
10130
10131 /*
10132 * Copy most (or possibly all) of the data.
10133 */
10134 kr = vm_map_copy_overwrite_nested(dst_map,
10135 dst_addr + head_size,
10136 copy,
10137 interruptible,
10138 (pmap_t) NULL,
10139 FALSE);
10140 if (kr != KERN_SUCCESS) {
10141 goto done;
10142 }
10143
10144 if (tail_size) {
10145 kr = vm_map_copy_overwrite_nested(dst_map,
10146 tail_addr,
10147 tail_copy,
10148 interruptible,
10149 (pmap_t) NULL,
10150 FALSE);
10151 }
10152
10153 done:
10154 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
10155 if (kr == KERN_SUCCESS) {
10156 /*
10157 * Discard all the copy maps.
10158 */
10159 if (head_copy) {
10160 vm_map_copy_discard(head_copy);
10161 head_copy = NULL;
10162 }
10163 vm_map_copy_discard(copy);
10164 if (tail_copy) {
10165 vm_map_copy_discard(tail_copy);
10166 tail_copy = NULL;
10167 }
10168 } else {
10169 /*
10170 * Re-assemble the original copy map.
10171 */
10172 if (head_copy) {
10173 entry = vm_map_copy_first_entry(head_copy);
10174 vm_map_copy_entry_unlink(head_copy, entry);
10175 vm_map_copy_entry_link(copy,
10176 vm_map_copy_to_entry(copy),
10177 entry);
10178 copy->offset -= head_size;
10179 copy->size += head_size;
10180 vm_map_copy_discard(head_copy);
10181 head_copy = NULL;
10182 }
10183 if (tail_copy) {
10184 entry = vm_map_copy_last_entry(tail_copy);
10185 vm_map_copy_entry_unlink(tail_copy, entry);
10186 vm_map_copy_entry_link(copy,
10187 vm_map_copy_last_entry(copy),
10188 entry);
10189 copy->size += tail_size;
10190 vm_map_copy_discard(tail_copy);
10191 tail_copy = NULL;
10192 }
10193 }
10194 return kr;
10195 }
10196
10197
10198 /*
10199 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
10200 *
10201 * Decription:
10202 * Physically copy unaligned data
10203 *
10204 * Implementation:
10205 * Unaligned parts of pages have to be physically copied. We use
10206 * a modified form of vm_fault_copy (which understands none-aligned
10207 * page offsets and sizes) to do the copy. We attempt to copy as
10208 * much memory in one go as possibly, however vm_fault_copy copies
10209 * within 1 memory object so we have to find the smaller of "amount left"
10210 * "source object data size" and "target object data size". With
10211 * unaligned data we don't need to split regions, therefore the source
10212 * (copy) object should be one map entry, the target range may be split
10213 * over multiple map entries however. In any event we are pessimistic
10214 * about these assumptions.
10215 *
10216 * Callers of this function must call vm_map_copy_require on
10217 * previously created vm_map_copy_t or pass a newly created
10218 * one to ensure that it hasn't been forged.
10219 *
10220 * Assumptions:
10221 * dst_map is locked on entry and is return locked on success,
10222 * unlocked on error.
10223 */
10224
10225 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)10226 vm_map_copy_overwrite_unaligned(
10227 vm_map_t dst_map,
10228 vm_map_entry_t entry,
10229 vm_map_copy_t copy,
10230 vm_map_offset_t start,
10231 boolean_t discard_on_success)
10232 {
10233 vm_map_entry_t copy_entry;
10234 vm_map_entry_t copy_entry_next;
10235 vm_map_version_t version;
10236 vm_object_t dst_object;
10237 vm_object_offset_t dst_offset;
10238 vm_object_offset_t src_offset;
10239 vm_object_offset_t entry_offset;
10240 vm_map_offset_t entry_end;
10241 vm_map_size_t src_size,
10242 dst_size,
10243 copy_size,
10244 amount_left;
10245 kern_return_t kr = KERN_SUCCESS;
10246
10247
10248 copy_entry = vm_map_copy_first_entry(copy);
10249
10250 vm_map_lock_write_to_read(dst_map);
10251
10252 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10253 amount_left = copy->size;
10254 /*
10255 * unaligned so we never clipped this entry, we need the offset into
10256 * the vm_object not just the data.
10257 */
10258 while (amount_left > 0) {
10259 if (entry == vm_map_to_entry(dst_map)) {
10260 vm_map_unlock_read(dst_map);
10261 return KERN_INVALID_ADDRESS;
10262 }
10263
10264 /* "start" must be within the current map entry */
10265 assert((start >= entry->vme_start) && (start < entry->vme_end));
10266
10267 /*
10268 * Check protection again
10269 */
10270 if (!(entry->protection & VM_PROT_WRITE)) {
10271 vm_map_unlock_read(dst_map);
10272 return KERN_PROTECTION_FAILURE;
10273 }
10274 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10275 vm_map_unlock_read(dst_map);
10276 return KERN_PROTECTION_FAILURE;
10277 }
10278
10279 dst_offset = start - entry->vme_start;
10280
10281 dst_size = entry->vme_end - start;
10282
10283 src_size = copy_entry->vme_end -
10284 (copy_entry->vme_start + src_offset);
10285
10286 if (dst_size < src_size) {
10287 /*
10288 * we can only copy dst_size bytes before
10289 * we have to get the next destination entry
10290 */
10291 copy_size = dst_size;
10292 } else {
10293 /*
10294 * we can only copy src_size bytes before
10295 * we have to get the next source copy entry
10296 */
10297 copy_size = src_size;
10298 }
10299
10300 if (copy_size > amount_left) {
10301 copy_size = amount_left;
10302 }
10303 /*
10304 * Entry needs copy, create a shadow shadow object for
10305 * Copy on write region.
10306 */
10307 if (entry->needs_copy) {
10308 if (vm_map_lock_read_to_write(dst_map)) {
10309 vm_map_lock_read(dst_map);
10310 goto RetryLookup;
10311 }
10312 VME_OBJECT_SHADOW(entry,
10313 (vm_map_size_t)(entry->vme_end
10314 - entry->vme_start),
10315 vm_map_always_shadow(dst_map));
10316 entry->needs_copy = FALSE;
10317 vm_map_lock_write_to_read(dst_map);
10318 }
10319 dst_object = VME_OBJECT(entry);
10320 /*
10321 * unlike with the virtual (aligned) copy we're going
10322 * to fault on it therefore we need a target object.
10323 */
10324 if (dst_object == VM_OBJECT_NULL) {
10325 if (vm_map_lock_read_to_write(dst_map)) {
10326 vm_map_lock_read(dst_map);
10327 goto RetryLookup;
10328 }
10329 dst_object = vm_object_allocate((vm_map_size_t)
10330 entry->vme_end - entry->vme_start);
10331 VME_OBJECT_SET(entry, dst_object, false, 0);
10332 VME_OFFSET_SET(entry, 0);
10333 assert(entry->use_pmap);
10334 vm_map_lock_write_to_read(dst_map);
10335 }
10336 /*
10337 * Take an object reference and unlock map. The "entry" may
10338 * disappear or change when the map is unlocked.
10339 */
10340 vm_object_reference(dst_object);
10341 version.main_timestamp = dst_map->timestamp;
10342 entry_offset = VME_OFFSET(entry);
10343 entry_end = entry->vme_end;
10344 vm_map_unlock_read(dst_map);
10345 /*
10346 * Copy as much as possible in one pass
10347 */
10348 kr = vm_fault_copy(
10349 VME_OBJECT(copy_entry),
10350 VME_OFFSET(copy_entry) + src_offset,
10351 ©_size,
10352 dst_object,
10353 entry_offset + dst_offset,
10354 dst_map,
10355 &version,
10356 THREAD_UNINT );
10357
10358 start += copy_size;
10359 src_offset += copy_size;
10360 amount_left -= copy_size;
10361 /*
10362 * Release the object reference
10363 */
10364 vm_object_deallocate(dst_object);
10365 /*
10366 * If a hard error occurred, return it now
10367 */
10368 if (kr != KERN_SUCCESS) {
10369 return kr;
10370 }
10371
10372 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10373 || amount_left == 0) {
10374 /*
10375 * all done with this copy entry, dispose.
10376 */
10377 copy_entry_next = copy_entry->vme_next;
10378
10379 if (discard_on_success) {
10380 vm_map_copy_entry_unlink(copy, copy_entry);
10381 assert(!copy_entry->is_sub_map);
10382 vm_object_deallocate(VME_OBJECT(copy_entry));
10383 vm_map_copy_entry_dispose(copy_entry);
10384 }
10385
10386 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10387 amount_left) {
10388 /*
10389 * not finished copying but run out of source
10390 */
10391 return KERN_INVALID_ADDRESS;
10392 }
10393
10394 copy_entry = copy_entry_next;
10395
10396 src_offset = 0;
10397 }
10398
10399 if (amount_left == 0) {
10400 return KERN_SUCCESS;
10401 }
10402
10403 vm_map_lock_read(dst_map);
10404 if (version.main_timestamp == dst_map->timestamp) {
10405 if (start == entry_end) {
10406 /*
10407 * destination region is split. Use the version
10408 * information to avoid a lookup in the normal
10409 * case.
10410 */
10411 entry = entry->vme_next;
10412 /*
10413 * should be contiguous. Fail if we encounter
10414 * a hole in the destination.
10415 */
10416 if (start != entry->vme_start) {
10417 vm_map_unlock_read(dst_map);
10418 return KERN_INVALID_ADDRESS;
10419 }
10420 }
10421 } else {
10422 /*
10423 * Map version check failed.
10424 * we must lookup the entry because somebody
10425 * might have changed the map behind our backs.
10426 */
10427 RetryLookup:
10428 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10429 vm_map_unlock_read(dst_map);
10430 return KERN_INVALID_ADDRESS;
10431 }
10432 }
10433 }/* while */
10434
10435 return KERN_SUCCESS;
10436 }/* vm_map_copy_overwrite_unaligned */
10437
10438 /*
10439 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10440 *
10441 * Description:
10442 * Does all the vm_trickery possible for whole pages.
10443 *
10444 * Implementation:
10445 *
10446 * If there are no permanent objects in the destination,
10447 * and the source and destination map entry zones match,
10448 * and the destination map entry is not shared,
10449 * then the map entries can be deleted and replaced
10450 * with those from the copy. The following code is the
10451 * basic idea of what to do, but there are lots of annoying
10452 * little details about getting protection and inheritance
10453 * right. Should add protection, inheritance, and sharing checks
10454 * to the above pass and make sure that no wiring is involved.
10455 *
10456 * Callers of this function must call vm_map_copy_require on
10457 * previously created vm_map_copy_t or pass a newly created
10458 * one to ensure that it hasn't been forged.
10459 */
10460
10461 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10462 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10463 int vm_map_copy_overwrite_aligned_src_large = 0;
10464
10465 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10466 vm_map_copy_overwrite_aligned(
10467 vm_map_t dst_map,
10468 vm_map_entry_t tmp_entry,
10469 vm_map_copy_t copy,
10470 vm_map_offset_t start,
10471 __unused pmap_t pmap)
10472 {
10473 vm_object_t object;
10474 vm_map_entry_t copy_entry;
10475 vm_map_size_t copy_size;
10476 vm_map_size_t size;
10477 vm_map_entry_t entry;
10478
10479 while ((copy_entry = vm_map_copy_first_entry(copy))
10480 != vm_map_copy_to_entry(copy)) {
10481 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10482
10483 entry = tmp_entry;
10484 if (entry->is_sub_map) {
10485 /* unnested when clipped earlier */
10486 assert(!entry->use_pmap);
10487 }
10488 if (entry == vm_map_to_entry(dst_map)) {
10489 vm_map_unlock(dst_map);
10490 return KERN_INVALID_ADDRESS;
10491 }
10492 size = (entry->vme_end - entry->vme_start);
10493 /*
10494 * Make sure that no holes popped up in the
10495 * address map, and that the protection is
10496 * still valid, in case the map was unlocked
10497 * earlier.
10498 */
10499
10500 if ((entry->vme_start != start) || ((entry->is_sub_map)
10501 && !entry->needs_copy)) {
10502 vm_map_unlock(dst_map);
10503 return KERN_INVALID_ADDRESS;
10504 }
10505 assert(entry != vm_map_to_entry(dst_map));
10506
10507 /*
10508 * Check protection again
10509 */
10510
10511 if (!(entry->protection & VM_PROT_WRITE)) {
10512 vm_map_unlock(dst_map);
10513 return KERN_PROTECTION_FAILURE;
10514 }
10515
10516 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10517 vm_map_unlock(dst_map);
10518 return KERN_PROTECTION_FAILURE;
10519 }
10520
10521 /*
10522 * Adjust to source size first
10523 */
10524
10525 if (copy_size < size) {
10526 if (entry->map_aligned &&
10527 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10528 VM_MAP_PAGE_MASK(dst_map))) {
10529 /* no longer map-aligned */
10530 entry->map_aligned = FALSE;
10531 }
10532 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10533 size = copy_size;
10534 }
10535
10536 /*
10537 * Adjust to destination size
10538 */
10539
10540 if (size < copy_size) {
10541 vm_map_copy_clip_end(copy, copy_entry,
10542 copy_entry->vme_start + size);
10543 copy_size = size;
10544 }
10545
10546 assert((entry->vme_end - entry->vme_start) == size);
10547 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10548 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10549
10550 /*
10551 * If the destination contains temporary unshared memory,
10552 * we can perform the copy by throwing it away and
10553 * installing the source data.
10554 */
10555
10556 object = VME_OBJECT(entry);
10557 if ((!entry->is_shared &&
10558 ((object == VM_OBJECT_NULL) ||
10559 (object->internal && !object->true_share))) ||
10560 entry->needs_copy) {
10561 vm_object_t old_object = VME_OBJECT(entry);
10562 vm_object_offset_t old_offset = VME_OFFSET(entry);
10563 vm_object_offset_t offset;
10564
10565 /*
10566 * Ensure that the source and destination aren't
10567 * identical
10568 */
10569 if (old_object == VME_OBJECT(copy_entry) &&
10570 old_offset == VME_OFFSET(copy_entry)) {
10571 vm_map_copy_entry_unlink(copy, copy_entry);
10572 vm_map_copy_entry_dispose(copy_entry);
10573
10574 if (old_object != VM_OBJECT_NULL) {
10575 vm_object_deallocate(old_object);
10576 }
10577
10578 start = tmp_entry->vme_end;
10579 tmp_entry = tmp_entry->vme_next;
10580 continue;
10581 }
10582
10583 #if XNU_TARGET_OS_OSX
10584 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10585 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10586 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10587 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10588 copy_size <= __TRADEOFF1_COPY_SIZE) {
10589 /*
10590 * Virtual vs. Physical copy tradeoff #1.
10591 *
10592 * Copying only a few pages out of a large
10593 * object: do a physical copy instead of
10594 * a virtual copy, to avoid possibly keeping
10595 * the entire large object alive because of
10596 * those few copy-on-write pages.
10597 */
10598 vm_map_copy_overwrite_aligned_src_large++;
10599 goto slow_copy;
10600 }
10601 #endif /* XNU_TARGET_OS_OSX */
10602
10603 if ((dst_map->pmap != kernel_pmap) &&
10604 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10605 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10606 vm_object_t new_object, new_shadow;
10607
10608 /*
10609 * We're about to map something over a mapping
10610 * established by malloc()...
10611 */
10612 new_object = VME_OBJECT(copy_entry);
10613 if (new_object != VM_OBJECT_NULL) {
10614 vm_object_lock_shared(new_object);
10615 }
10616 while (new_object != VM_OBJECT_NULL &&
10617 #if XNU_TARGET_OS_OSX
10618 !new_object->true_share &&
10619 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10620 #endif /* XNU_TARGET_OS_OSX */
10621 new_object->internal) {
10622 new_shadow = new_object->shadow;
10623 if (new_shadow == VM_OBJECT_NULL) {
10624 break;
10625 }
10626 vm_object_lock_shared(new_shadow);
10627 vm_object_unlock(new_object);
10628 new_object = new_shadow;
10629 }
10630 if (new_object != VM_OBJECT_NULL) {
10631 if (!new_object->internal) {
10632 /*
10633 * The new mapping is backed
10634 * by an external object. We
10635 * don't want malloc'ed memory
10636 * to be replaced with such a
10637 * non-anonymous mapping, so
10638 * let's go off the optimized
10639 * path...
10640 */
10641 vm_map_copy_overwrite_aligned_src_not_internal++;
10642 vm_object_unlock(new_object);
10643 goto slow_copy;
10644 }
10645 #if XNU_TARGET_OS_OSX
10646 if (new_object->true_share ||
10647 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10648 /*
10649 * Same if there's a "true_share"
10650 * object in the shadow chain, or
10651 * an object with a non-default
10652 * (SYMMETRIC) copy strategy.
10653 */
10654 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10655 vm_object_unlock(new_object);
10656 goto slow_copy;
10657 }
10658 #endif /* XNU_TARGET_OS_OSX */
10659 vm_object_unlock(new_object);
10660 }
10661 /*
10662 * The new mapping is still backed by
10663 * anonymous (internal) memory, so it's
10664 * OK to substitute it for the original
10665 * malloc() mapping.
10666 */
10667 }
10668
10669 if (old_object != VM_OBJECT_NULL) {
10670 assert(!entry->vme_permanent);
10671 if (entry->is_sub_map) {
10672 if (entry->use_pmap) {
10673 #ifndef NO_NESTED_PMAP
10674 pmap_unnest(dst_map->pmap,
10675 (addr64_t)entry->vme_start,
10676 entry->vme_end - entry->vme_start);
10677 #endif /* NO_NESTED_PMAP */
10678 if (dst_map->mapped_in_other_pmaps) {
10679 /* clean up parent */
10680 /* map/maps */
10681 vm_map_submap_pmap_clean(
10682 dst_map, entry->vme_start,
10683 entry->vme_end,
10684 VME_SUBMAP(entry),
10685 VME_OFFSET(entry));
10686 }
10687 } else {
10688 vm_map_submap_pmap_clean(
10689 dst_map, entry->vme_start,
10690 entry->vme_end,
10691 VME_SUBMAP(entry),
10692 VME_OFFSET(entry));
10693 }
10694 vm_map_deallocate(VME_SUBMAP(entry));
10695 } else {
10696 if (dst_map->mapped_in_other_pmaps) {
10697 vm_object_pmap_protect_options(
10698 VME_OBJECT(entry),
10699 VME_OFFSET(entry),
10700 entry->vme_end
10701 - entry->vme_start,
10702 PMAP_NULL,
10703 PAGE_SIZE,
10704 entry->vme_start,
10705 VM_PROT_NONE,
10706 PMAP_OPTIONS_REMOVE);
10707 } else {
10708 pmap_remove_options(
10709 dst_map->pmap,
10710 (addr64_t)(entry->vme_start),
10711 (addr64_t)(entry->vme_end),
10712 PMAP_OPTIONS_REMOVE);
10713 }
10714 vm_object_deallocate(old_object);
10715 }
10716 }
10717
10718 if (entry->iokit_acct) {
10719 /* keep using iokit accounting */
10720 entry->use_pmap = FALSE;
10721 } else {
10722 /* use pmap accounting */
10723 entry->use_pmap = TRUE;
10724 }
10725 assert(!entry->vme_permanent);
10726 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry), false, 0);
10727 object = VME_OBJECT(entry);
10728 entry->needs_copy = copy_entry->needs_copy;
10729 entry->wired_count = 0;
10730 entry->user_wired_count = 0;
10731 offset = VME_OFFSET(copy_entry);
10732 VME_OFFSET_SET(entry, offset);
10733
10734 vm_map_copy_entry_unlink(copy, copy_entry);
10735 vm_map_copy_entry_dispose(copy_entry);
10736
10737 /*
10738 * we could try to push pages into the pmap at this point, BUT
10739 * this optimization only saved on average 2 us per page if ALL
10740 * the pages in the source were currently mapped
10741 * and ALL the pages in the dest were touched, if there were fewer
10742 * than 2/3 of the pages touched, this optimization actually cost more cycles
10743 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10744 */
10745
10746 /*
10747 * Set up for the next iteration. The map
10748 * has not been unlocked, so the next
10749 * address should be at the end of this
10750 * entry, and the next map entry should be
10751 * the one following it.
10752 */
10753
10754 start = tmp_entry->vme_end;
10755 tmp_entry = tmp_entry->vme_next;
10756 } else {
10757 vm_map_version_t version;
10758 vm_object_t dst_object;
10759 vm_object_offset_t dst_offset;
10760 kern_return_t r;
10761
10762 slow_copy:
10763 if (entry->needs_copy) {
10764 VME_OBJECT_SHADOW(entry,
10765 (entry->vme_end -
10766 entry->vme_start),
10767 vm_map_always_shadow(dst_map));
10768 entry->needs_copy = FALSE;
10769 }
10770
10771 dst_object = VME_OBJECT(entry);
10772 dst_offset = VME_OFFSET(entry);
10773
10774 /*
10775 * Take an object reference, and record
10776 * the map version information so that the
10777 * map can be safely unlocked.
10778 */
10779
10780 if (dst_object == VM_OBJECT_NULL) {
10781 /*
10782 * We would usually have just taken the
10783 * optimized path above if the destination
10784 * object has not been allocated yet. But we
10785 * now disable that optimization if the copy
10786 * entry's object is not backed by anonymous
10787 * memory to avoid replacing malloc'ed
10788 * (i.e. re-usable) anonymous memory with a
10789 * not-so-anonymous mapping.
10790 * So we have to handle this case here and
10791 * allocate a new VM object for this map entry.
10792 */
10793 dst_object = vm_object_allocate(
10794 entry->vme_end - entry->vme_start);
10795 dst_offset = 0;
10796 VME_OBJECT_SET(entry, dst_object, false, 0);
10797 VME_OFFSET_SET(entry, dst_offset);
10798 assert(entry->use_pmap);
10799 }
10800
10801 vm_object_reference(dst_object);
10802
10803 /* account for unlock bumping up timestamp */
10804 version.main_timestamp = dst_map->timestamp + 1;
10805
10806 vm_map_unlock(dst_map);
10807
10808 /*
10809 * Copy as much as possible in one pass
10810 */
10811
10812 copy_size = size;
10813 r = vm_fault_copy(
10814 VME_OBJECT(copy_entry),
10815 VME_OFFSET(copy_entry),
10816 ©_size,
10817 dst_object,
10818 dst_offset,
10819 dst_map,
10820 &version,
10821 THREAD_UNINT );
10822
10823 /*
10824 * Release the object reference
10825 */
10826
10827 vm_object_deallocate(dst_object);
10828
10829 /*
10830 * If a hard error occurred, return it now
10831 */
10832
10833 if (r != KERN_SUCCESS) {
10834 return r;
10835 }
10836
10837 if (copy_size != 0) {
10838 /*
10839 * Dispose of the copied region
10840 */
10841
10842 vm_map_copy_clip_end(copy, copy_entry,
10843 copy_entry->vme_start + copy_size);
10844 vm_map_copy_entry_unlink(copy, copy_entry);
10845 vm_object_deallocate(VME_OBJECT(copy_entry));
10846 vm_map_copy_entry_dispose(copy_entry);
10847 }
10848
10849 /*
10850 * Pick up in the destination map where we left off.
10851 *
10852 * Use the version information to avoid a lookup
10853 * in the normal case.
10854 */
10855
10856 start += copy_size;
10857 vm_map_lock(dst_map);
10858 if (version.main_timestamp == dst_map->timestamp &&
10859 copy_size != 0) {
10860 /* We can safely use saved tmp_entry value */
10861
10862 if (tmp_entry->map_aligned &&
10863 !VM_MAP_PAGE_ALIGNED(
10864 start,
10865 VM_MAP_PAGE_MASK(dst_map))) {
10866 /* no longer map-aligned */
10867 tmp_entry->map_aligned = FALSE;
10868 }
10869 vm_map_clip_end(dst_map, tmp_entry, start);
10870 tmp_entry = tmp_entry->vme_next;
10871 } else {
10872 /* Must do lookup of tmp_entry */
10873
10874 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10875 vm_map_unlock(dst_map);
10876 return KERN_INVALID_ADDRESS;
10877 }
10878 if (tmp_entry->map_aligned &&
10879 !VM_MAP_PAGE_ALIGNED(
10880 start,
10881 VM_MAP_PAGE_MASK(dst_map))) {
10882 /* no longer map-aligned */
10883 tmp_entry->map_aligned = FALSE;
10884 }
10885 vm_map_clip_start(dst_map, tmp_entry, start);
10886 }
10887 }
10888 }/* while */
10889
10890 return KERN_SUCCESS;
10891 }/* vm_map_copy_overwrite_aligned */
10892
10893 /*
10894 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10895 *
10896 * Description:
10897 * Copy in data to a kernel buffer from space in the
10898 * source map. The original space may be optionally
10899 * deallocated.
10900 *
10901 * If successful, returns a new copy object.
10902 */
10903 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10904 vm_map_copyin_kernel_buffer(
10905 vm_map_t src_map,
10906 vm_map_offset_t src_addr,
10907 vm_map_size_t len,
10908 boolean_t src_destroy,
10909 vm_map_copy_t *copy_result)
10910 {
10911 kern_return_t kr;
10912 vm_map_copy_t copy;
10913 void *kdata;
10914
10915 if (len > msg_ool_size_small) {
10916 return KERN_INVALID_ARGUMENT;
10917 }
10918
10919 kdata = kalloc_data(len, Z_WAITOK);
10920 if (kdata == NULL) {
10921 return KERN_RESOURCE_SHORTAGE;
10922 }
10923 kr = copyinmap(src_map, src_addr, kdata, (vm_size_t)len);
10924 if (kr != KERN_SUCCESS) {
10925 kfree_data(kdata, len);
10926 return kr;
10927 }
10928
10929 copy = vm_map_copy_allocate(VM_MAP_COPY_KERNEL_BUFFER);
10930 copy->cpy_kdata = kdata;
10931 copy->size = len;
10932 copy->offset = 0;
10933
10934 if (src_destroy) {
10935 vmr_flags_t flags = VM_MAP_REMOVE_INTERRUPTIBLE;
10936
10937 if (src_map == kernel_map) {
10938 flags |= VM_MAP_REMOVE_KUNWIRE;
10939 }
10940
10941 (void)vm_map_remove_guard(src_map,
10942 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
10943 vm_map_round_page(src_addr + len, VM_MAP_PAGE_MASK(src_map)),
10944 flags, KMEM_GUARD_NONE);
10945 }
10946
10947 *copy_result = copy;
10948 return KERN_SUCCESS;
10949 }
10950
10951 /*
10952 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10953 *
10954 * Description:
10955 * Copy out data from a kernel buffer into space in the
10956 * destination map. The space may be otpionally dynamically
10957 * allocated.
10958 *
10959 * If successful, consumes the copy object.
10960 * Otherwise, the caller is responsible for it.
10961 *
10962 * Callers of this function must call vm_map_copy_require on
10963 * previously created vm_map_copy_t or pass a newly created
10964 * one to ensure that it hasn't been forged.
10965 */
10966 static int vm_map_copyout_kernel_buffer_failures = 0;
10967 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10968 vm_map_copyout_kernel_buffer(
10969 vm_map_t map,
10970 vm_map_address_t *addr, /* IN/OUT */
10971 vm_map_copy_t copy,
10972 vm_map_size_t copy_size,
10973 boolean_t overwrite,
10974 boolean_t consume_on_success)
10975 {
10976 kern_return_t kr = KERN_SUCCESS;
10977 thread_t thread = current_thread();
10978
10979 assert(copy->size == copy_size);
10980
10981 /*
10982 * check for corrupted vm_map_copy structure
10983 */
10984 if (copy_size > msg_ool_size_small || copy->offset) {
10985 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10986 (long long)copy->size, (long long)copy->offset);
10987 }
10988
10989 if (!overwrite) {
10990 /*
10991 * Allocate space in the target map for the data
10992 */
10993 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
10994
10995 if (map == kernel_map) {
10996 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
10997 }
10998
10999 *addr = 0;
11000 kr = vm_map_enter(map,
11001 addr,
11002 vm_map_round_page(copy_size,
11003 VM_MAP_PAGE_MASK(map)),
11004 (vm_map_offset_t) 0,
11005 vmk_flags,
11006 VM_OBJECT_NULL,
11007 (vm_object_offset_t) 0,
11008 FALSE,
11009 VM_PROT_DEFAULT,
11010 VM_PROT_ALL,
11011 VM_INHERIT_DEFAULT);
11012 if (kr != KERN_SUCCESS) {
11013 return kr;
11014 }
11015 #if KASAN
11016 if (map->pmap == kernel_pmap) {
11017 kasan_notify_address(*addr, copy->size);
11018 }
11019 #endif
11020 }
11021
11022 /*
11023 * Copyout the data from the kernel buffer to the target map.
11024 */
11025 if (thread->map == map) {
11026 /*
11027 * If the target map is the current map, just do
11028 * the copy.
11029 */
11030 assert((vm_size_t)copy_size == copy_size);
11031 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
11032 kr = KERN_INVALID_ADDRESS;
11033 }
11034 } else {
11035 vm_map_t oldmap;
11036
11037 /*
11038 * If the target map is another map, assume the
11039 * target's address space identity for the duration
11040 * of the copy.
11041 */
11042 vm_map_reference(map);
11043 oldmap = vm_map_switch(map);
11044
11045 assert((vm_size_t)copy_size == copy_size);
11046 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
11047 vm_map_copyout_kernel_buffer_failures++;
11048 kr = KERN_INVALID_ADDRESS;
11049 }
11050
11051 (void) vm_map_switch(oldmap);
11052 vm_map_deallocate(map);
11053 }
11054
11055 if (kr != KERN_SUCCESS) {
11056 /* the copy failed, clean up */
11057 if (!overwrite) {
11058 /*
11059 * Deallocate the space we allocated in the target map.
11060 */
11061 (void) vm_map_remove(map,
11062 vm_map_trunc_page(*addr,
11063 VM_MAP_PAGE_MASK(map)),
11064 vm_map_round_page((*addr +
11065 vm_map_round_page(copy_size,
11066 VM_MAP_PAGE_MASK(map))),
11067 VM_MAP_PAGE_MASK(map)));
11068 *addr = 0;
11069 }
11070 } else {
11071 /* copy was successful, dicard the copy structure */
11072 if (consume_on_success) {
11073 kfree_data(copy->cpy_kdata, copy_size);
11074 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
11075 }
11076 }
11077
11078 return kr;
11079 }
11080
11081 /*
11082 * Routine: vm_map_copy_insert [internal use only]
11083 *
11084 * Description:
11085 * Link a copy chain ("copy") into a map at the
11086 * specified location (after "where").
11087 *
11088 * Callers of this function must call vm_map_copy_require on
11089 * previously created vm_map_copy_t or pass a newly created
11090 * one to ensure that it hasn't been forged.
11091 * Side effects:
11092 * The copy chain is destroyed.
11093 */
11094 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)11095 vm_map_copy_insert(
11096 vm_map_t map,
11097 vm_map_entry_t after_where,
11098 vm_map_copy_t copy)
11099 {
11100 vm_map_entry_t entry;
11101
11102 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
11103 entry = vm_map_copy_first_entry(copy);
11104 vm_map_copy_entry_unlink(copy, entry);
11105 vm_map_store_entry_link(map, after_where, entry,
11106 VM_MAP_KERNEL_FLAGS_NONE);
11107 after_where = entry;
11108 }
11109 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
11110 }
11111
11112 /*
11113 * Callers of this function must call vm_map_copy_require on
11114 * previously created vm_map_copy_t or pass a newly created
11115 * one to ensure that it hasn't been forged.
11116 */
11117 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)11118 vm_map_copy_remap(
11119 vm_map_t map,
11120 vm_map_entry_t where,
11121 vm_map_copy_t copy,
11122 vm_map_offset_t adjustment,
11123 vm_prot_t cur_prot,
11124 vm_prot_t max_prot,
11125 vm_inherit_t inheritance)
11126 {
11127 vm_map_entry_t copy_entry, new_entry;
11128
11129 for (copy_entry = vm_map_copy_first_entry(copy);
11130 copy_entry != vm_map_copy_to_entry(copy);
11131 copy_entry = copy_entry->vme_next) {
11132 /* get a new VM map entry for the map */
11133 new_entry = vm_map_entry_create(map);
11134 /* copy the "copy entry" to the new entry */
11135 vm_map_entry_copy(map, new_entry, copy_entry);
11136 /* adjust "start" and "end" */
11137 new_entry->vme_start += adjustment;
11138 new_entry->vme_end += adjustment;
11139 /* clear some attributes */
11140 new_entry->inheritance = inheritance;
11141 new_entry->protection = cur_prot;
11142 new_entry->max_protection = max_prot;
11143 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
11144 /* take an extra reference on the entry's "object" */
11145 if (new_entry->is_sub_map) {
11146 assert(!new_entry->use_pmap); /* not nested */
11147 vm_map_reference(VME_SUBMAP(new_entry));
11148 } else {
11149 vm_object_reference(VME_OBJECT(new_entry));
11150 }
11151 /* insert the new entry in the map */
11152 vm_map_store_entry_link(map, where, new_entry,
11153 VM_MAP_KERNEL_FLAGS_NONE);
11154 /* continue inserting the "copy entries" after the new entry */
11155 where = new_entry;
11156 }
11157 }
11158
11159
11160 /*
11161 * Returns true if *size matches (or is in the range of) copy->size.
11162 * Upon returning true, the *size field is updated with the actual size of the
11163 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
11164 */
11165 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)11166 vm_map_copy_validate_size(
11167 vm_map_t dst_map,
11168 vm_map_copy_t copy,
11169 vm_map_size_t *size)
11170 {
11171 if (copy == VM_MAP_COPY_NULL) {
11172 return FALSE;
11173 }
11174
11175 /*
11176 * Assert that the vm_map_copy is coming from the right
11177 * zone and hasn't been forged
11178 */
11179 vm_map_copy_require(copy);
11180
11181 vm_map_size_t copy_sz = copy->size;
11182 vm_map_size_t sz = *size;
11183 switch (copy->type) {
11184 case VM_MAP_COPY_KERNEL_BUFFER:
11185 if (sz == copy_sz) {
11186 return TRUE;
11187 }
11188 break;
11189 case VM_MAP_COPY_ENTRY_LIST:
11190 /*
11191 * potential page-size rounding prevents us from exactly
11192 * validating this flavor of vm_map_copy, but we can at least
11193 * assert that it's within a range.
11194 */
11195 if (copy_sz >= sz &&
11196 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
11197 *size = copy_sz;
11198 return TRUE;
11199 }
11200 break;
11201 default:
11202 break;
11203 }
11204 return FALSE;
11205 }
11206
11207 /*
11208 * Routine: vm_map_copyout_size
11209 *
11210 * Description:
11211 * Copy out a copy chain ("copy") into newly-allocated
11212 * space in the destination map. Uses a prevalidated
11213 * size for the copy object (vm_map_copy_validate_size).
11214 *
11215 * If successful, consumes the copy object.
11216 * Otherwise, the caller is responsible for it.
11217 */
11218 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)11219 vm_map_copyout_size(
11220 vm_map_t dst_map,
11221 vm_map_address_t *dst_addr, /* OUT */
11222 vm_map_copy_t copy,
11223 vm_map_size_t copy_size)
11224 {
11225 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
11226 TRUE, /* consume_on_success */
11227 VM_PROT_DEFAULT,
11228 VM_PROT_ALL,
11229 VM_INHERIT_DEFAULT);
11230 }
11231
11232 /*
11233 * Routine: vm_map_copyout
11234 *
11235 * Description:
11236 * Copy out a copy chain ("copy") into newly-allocated
11237 * space in the destination map.
11238 *
11239 * If successful, consumes the copy object.
11240 * Otherwise, the caller is responsible for it.
11241 */
11242 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)11243 vm_map_copyout(
11244 vm_map_t dst_map,
11245 vm_map_address_t *dst_addr, /* OUT */
11246 vm_map_copy_t copy)
11247 {
11248 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
11249 TRUE, /* consume_on_success */
11250 VM_PROT_DEFAULT,
11251 VM_PROT_ALL,
11252 VM_INHERIT_DEFAULT);
11253 }
11254
11255 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)11256 vm_map_copyout_internal(
11257 vm_map_t dst_map,
11258 vm_map_address_t *dst_addr, /* OUT */
11259 vm_map_copy_t copy,
11260 vm_map_size_t copy_size,
11261 boolean_t consume_on_success,
11262 vm_prot_t cur_protection,
11263 vm_prot_t max_protection,
11264 vm_inherit_t inheritance)
11265 {
11266 vm_map_size_t size;
11267 vm_map_size_t adjustment;
11268 vm_map_offset_t start;
11269 vm_object_offset_t vm_copy_start;
11270 vm_map_entry_t last;
11271 vm_map_entry_t entry;
11272 vm_map_copy_t original_copy;
11273 kern_return_t kr;
11274 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
11275
11276 /*
11277 * Check for null copy object.
11278 */
11279
11280 if (copy == VM_MAP_COPY_NULL) {
11281 *dst_addr = 0;
11282 return KERN_SUCCESS;
11283 }
11284
11285 /*
11286 * Assert that the vm_map_copy is coming from the right
11287 * zone and hasn't been forged
11288 */
11289 vm_map_copy_require(copy);
11290
11291 if (copy->size != copy_size) {
11292 *dst_addr = 0;
11293 return KERN_FAILURE;
11294 }
11295
11296 /*
11297 * Check for special kernel buffer allocated
11298 * by new_ipc_kmsg_copyin.
11299 */
11300
11301 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11302 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11303 copy, copy_size, FALSE,
11304 consume_on_success);
11305 }
11306
11307 original_copy = copy;
11308 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11309 vm_map_copy_t target_copy;
11310 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11311
11312 target_copy = VM_MAP_COPY_NULL;
11313 DEBUG4K_ADJUST("adjusting...\n");
11314 kr = vm_map_copy_adjust_to_target(
11315 copy,
11316 0, /* offset */
11317 copy->size, /* size */
11318 dst_map,
11319 TRUE, /* copy */
11320 &target_copy,
11321 &overmap_start,
11322 &overmap_end,
11323 &trimmed_start);
11324 if (kr != KERN_SUCCESS) {
11325 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11326 return kr;
11327 }
11328 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11329 if (target_copy != copy) {
11330 copy = target_copy;
11331 }
11332 copy_size = copy->size;
11333 }
11334
11335 /*
11336 * Find space for the data
11337 */
11338
11339 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11340 VM_MAP_COPY_PAGE_MASK(copy));
11341 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11342 VM_MAP_COPY_PAGE_MASK(copy))
11343 - vm_copy_start;
11344
11345 vm_map_kernel_flags_update_range_id(&vmk_flags, dst_map);
11346
11347 vm_map_lock(dst_map);
11348 kr = vm_map_locate_space(dst_map, size, 0, vmk_flags,
11349 &start, &last);
11350 if (kr != KERN_SUCCESS) {
11351 vm_map_unlock(dst_map);
11352 return kr;
11353 }
11354
11355 adjustment = start - vm_copy_start;
11356 if (!consume_on_success) {
11357 /*
11358 * We're not allowed to consume "copy", so we'll have to
11359 * copy its map entries into the destination map below.
11360 * No need to re-allocate map entries from the correct
11361 * (pageable or not) zone, since we'll get new map entries
11362 * during the transfer.
11363 * We'll also adjust the map entries's "start" and "end"
11364 * during the transfer, to keep "copy"'s entries consistent
11365 * with its "offset".
11366 */
11367 goto after_adjustments;
11368 }
11369
11370 /*
11371 * Since we're going to just drop the map
11372 * entries from the copy into the destination
11373 * map, they must come from the same pool.
11374 */
11375
11376 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11377 /*
11378 * Mismatches occur when dealing with the default
11379 * pager.
11380 */
11381 vm_map_entry_t next, new;
11382
11383 /*
11384 * Find the zone that the copies were allocated from
11385 */
11386
11387 entry = vm_map_copy_first_entry(copy);
11388
11389 /*
11390 * Reinitialize the copy so that vm_map_copy_entry_link
11391 * will work.
11392 */
11393 vm_map_store_copy_reset(copy, entry);
11394 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11395
11396 /*
11397 * Copy each entry.
11398 */
11399 while (entry != vm_map_copy_to_entry(copy)) {
11400 new = vm_map_copy_entry_create(copy);
11401 vm_map_entry_copy_full(new, entry);
11402 new->vme_no_copy_on_read = FALSE;
11403 assert(!new->iokit_acct);
11404 if (new->is_sub_map) {
11405 /* clr address space specifics */
11406 new->use_pmap = FALSE;
11407 }
11408 vm_map_copy_entry_link(copy,
11409 vm_map_copy_last_entry(copy),
11410 new);
11411 next = entry->vme_next;
11412 vm_map_entry_dispose(entry);
11413 entry = next;
11414 }
11415 }
11416
11417 /*
11418 * Adjust the addresses in the copy chain, and
11419 * reset the region attributes.
11420 */
11421
11422 for (entry = vm_map_copy_first_entry(copy);
11423 entry != vm_map_copy_to_entry(copy);
11424 entry = entry->vme_next) {
11425 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11426 /*
11427 * We're injecting this copy entry into a map that
11428 * has the standard page alignment, so clear
11429 * "map_aligned" (which might have been inherited
11430 * from the original map entry).
11431 */
11432 entry->map_aligned = FALSE;
11433 }
11434
11435 entry->vme_start += adjustment;
11436 entry->vme_end += adjustment;
11437
11438 if (entry->map_aligned) {
11439 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11440 VM_MAP_PAGE_MASK(dst_map)));
11441 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11442 VM_MAP_PAGE_MASK(dst_map)));
11443 }
11444
11445 entry->inheritance = VM_INHERIT_DEFAULT;
11446 entry->protection = VM_PROT_DEFAULT;
11447 entry->max_protection = VM_PROT_ALL;
11448 entry->behavior = VM_BEHAVIOR_DEFAULT;
11449
11450 /*
11451 * If the entry is now wired,
11452 * map the pages into the destination map.
11453 */
11454 if (entry->wired_count != 0) {
11455 vm_map_offset_t va;
11456 vm_object_offset_t offset;
11457 vm_object_t object;
11458 vm_prot_t prot;
11459 int type_of_fault;
11460
11461 /* TODO4K would need to use actual page size */
11462 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11463
11464 object = VME_OBJECT(entry);
11465 offset = VME_OFFSET(entry);
11466 va = entry->vme_start;
11467
11468 pmap_pageable(dst_map->pmap,
11469 entry->vme_start,
11470 entry->vme_end,
11471 TRUE);
11472
11473 while (va < entry->vme_end) {
11474 vm_page_t m;
11475 struct vm_object_fault_info fault_info = {};
11476
11477 /*
11478 * Look up the page in the object.
11479 * Assert that the page will be found in the
11480 * top object:
11481 * either
11482 * the object was newly created by
11483 * vm_object_copy_slowly, and has
11484 * copies of all of the pages from
11485 * the source object
11486 * or
11487 * the object was moved from the old
11488 * map entry; because the old map
11489 * entry was wired, all of the pages
11490 * were in the top-level object.
11491 * (XXX not true if we wire pages for
11492 * reading)
11493 */
11494 vm_object_lock(object);
11495
11496 m = vm_page_lookup(object, offset);
11497 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11498 m->vmp_absent) {
11499 panic("vm_map_copyout: wiring %p", m);
11500 }
11501
11502 prot = entry->protection;
11503
11504 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11505 prot) {
11506 prot |= VM_PROT_EXECUTE;
11507 }
11508
11509 type_of_fault = DBG_CACHE_HIT_FAULT;
11510
11511 fault_info.user_tag = VME_ALIAS(entry);
11512 fault_info.pmap_options = 0;
11513 if (entry->iokit_acct ||
11514 (!entry->is_sub_map && !entry->use_pmap)) {
11515 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11516 }
11517 if (entry->vme_xnu_user_debug &&
11518 !VM_PAGE_OBJECT(m)->code_signed) {
11519 /*
11520 * Modified code-signed executable
11521 * region: this page does not belong
11522 * to a code-signed VM object, so it
11523 * must have been copied and should
11524 * therefore be typed XNU_USER_DEBUG
11525 * rather than XNU_USER_EXEC.
11526 */
11527 fault_info.pmap_options |= PMAP_OPTIONS_XNU_USER_DEBUG;
11528 }
11529
11530 vm_fault_enter(m,
11531 dst_map->pmap,
11532 va,
11533 PAGE_SIZE, 0,
11534 prot,
11535 prot,
11536 VM_PAGE_WIRED(m),
11537 FALSE, /* change_wiring */
11538 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11539 &fault_info,
11540 NULL, /* need_retry */
11541 &type_of_fault);
11542
11543 vm_object_unlock(object);
11544
11545 offset += PAGE_SIZE_64;
11546 va += PAGE_SIZE;
11547 }
11548 }
11549 }
11550
11551 after_adjustments:
11552
11553 /*
11554 * Correct the page alignment for the result
11555 */
11556
11557 *dst_addr = start + (copy->offset - vm_copy_start);
11558
11559 #if KASAN
11560 kasan_notify_address(*dst_addr, size);
11561 #endif
11562
11563 /*
11564 * Update the hints and the map size
11565 */
11566
11567 if (consume_on_success) {
11568 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11569 } else {
11570 SAVE_HINT_MAP_WRITE(dst_map, last);
11571 }
11572
11573 dst_map->size += size;
11574
11575 /*
11576 * Link in the copy
11577 */
11578
11579 if (consume_on_success) {
11580 vm_map_copy_insert(dst_map, last, copy);
11581 if (copy != original_copy) {
11582 vm_map_copy_discard(original_copy);
11583 original_copy = VM_MAP_COPY_NULL;
11584 }
11585 } else {
11586 vm_map_copy_remap(dst_map, last, copy, adjustment,
11587 cur_protection, max_protection,
11588 inheritance);
11589 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11590 vm_map_copy_discard(copy);
11591 copy = original_copy;
11592 }
11593 }
11594
11595
11596 vm_map_unlock(dst_map);
11597
11598 /*
11599 * XXX If wiring_required, call vm_map_pageable
11600 */
11601
11602 return KERN_SUCCESS;
11603 }
11604
11605 /*
11606 * Routine: vm_map_copyin
11607 *
11608 * Description:
11609 * see vm_map_copyin_common. Exported via Unsupported.exports.
11610 *
11611 */
11612
11613 #undef vm_map_copyin
11614
11615 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11616 vm_map_copyin(
11617 vm_map_t src_map,
11618 vm_map_address_t src_addr,
11619 vm_map_size_t len,
11620 boolean_t src_destroy,
11621 vm_map_copy_t *copy_result) /* OUT */
11622 {
11623 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11624 FALSE, copy_result, FALSE);
11625 }
11626
11627 /*
11628 * Routine: vm_map_copyin_common
11629 *
11630 * Description:
11631 * Copy the specified region (src_addr, len) from the
11632 * source address space (src_map), possibly removing
11633 * the region from the source address space (src_destroy).
11634 *
11635 * Returns:
11636 * A vm_map_copy_t object (copy_result), suitable for
11637 * insertion into another address space (using vm_map_copyout),
11638 * copying over another address space region (using
11639 * vm_map_copy_overwrite). If the copy is unused, it
11640 * should be destroyed (using vm_map_copy_discard).
11641 *
11642 * In/out conditions:
11643 * The source map should not be locked on entry.
11644 */
11645
11646 typedef struct submap_map {
11647 vm_map_t parent_map;
11648 vm_map_offset_t base_start;
11649 vm_map_offset_t base_end;
11650 vm_map_size_t base_len;
11651 struct submap_map *next;
11652 } submap_map_t;
11653
11654 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11655 vm_map_copyin_common(
11656 vm_map_t src_map,
11657 vm_map_address_t src_addr,
11658 vm_map_size_t len,
11659 boolean_t src_destroy,
11660 __unused boolean_t src_volatile,
11661 vm_map_copy_t *copy_result, /* OUT */
11662 boolean_t use_maxprot)
11663 {
11664 int flags;
11665
11666 flags = 0;
11667 if (src_destroy) {
11668 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11669 }
11670 if (use_maxprot) {
11671 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11672 }
11673 return vm_map_copyin_internal(src_map,
11674 src_addr,
11675 len,
11676 flags,
11677 copy_result);
11678 }
11679 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11680 vm_map_copyin_internal(
11681 vm_map_t src_map,
11682 vm_map_address_t src_addr,
11683 vm_map_size_t len,
11684 int flags,
11685 vm_map_copy_t *copy_result) /* OUT */
11686 {
11687 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11688 * in multi-level lookup, this
11689 * entry contains the actual
11690 * vm_object/offset.
11691 */
11692 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11693
11694 vm_map_offset_t src_start; /* Start of current entry --
11695 * where copy is taking place now
11696 */
11697 vm_map_offset_t src_end; /* End of entire region to be
11698 * copied */
11699 vm_map_offset_t src_base;
11700 vm_map_t base_map = src_map;
11701 boolean_t map_share = FALSE;
11702 submap_map_t *parent_maps = NULL;
11703
11704 vm_map_copy_t copy; /* Resulting copy */
11705 vm_map_address_t copy_addr;
11706 vm_map_size_t copy_size;
11707 boolean_t src_destroy;
11708 boolean_t use_maxprot;
11709 boolean_t preserve_purgeable;
11710 boolean_t entry_was_shared;
11711 vm_map_entry_t saved_src_entry;
11712
11713 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11714 return KERN_INVALID_ARGUMENT;
11715 }
11716
11717 #if CONFIG_KERNEL_TBI
11718 if (src_map->pmap == kernel_pmap) {
11719 src_addr = VM_KERNEL_TBI_FILL(src_addr);
11720 }
11721 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
11722
11723 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11724 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11725 preserve_purgeable =
11726 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11727
11728 /*
11729 * Check for copies of zero bytes.
11730 */
11731
11732 if (len == 0) {
11733 *copy_result = VM_MAP_COPY_NULL;
11734 return KERN_SUCCESS;
11735 }
11736
11737 /*
11738 * Check that the end address doesn't overflow
11739 */
11740 if (__improbable(vm_map_range_overflows(src_map, src_addr, len))) {
11741 return KERN_INVALID_ADDRESS;
11742 }
11743 src_end = src_addr + len;
11744 if (src_end < src_addr) {
11745 return KERN_INVALID_ADDRESS;
11746 }
11747
11748 /*
11749 * Compute (page aligned) start and end of region
11750 */
11751 src_start = vm_map_trunc_page(src_addr,
11752 VM_MAP_PAGE_MASK(src_map));
11753 src_end = vm_map_round_page(src_end,
11754 VM_MAP_PAGE_MASK(src_map));
11755 if (src_end < src_addr) {
11756 return KERN_INVALID_ADDRESS;
11757 }
11758
11759 /*
11760 * If the copy is sufficiently small, use a kernel buffer instead
11761 * of making a virtual copy. The theory being that the cost of
11762 * setting up VM (and taking C-O-W faults) dominates the copy costs
11763 * for small regions.
11764 */
11765 if ((len <= msg_ool_size_small) &&
11766 !use_maxprot &&
11767 !preserve_purgeable &&
11768 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11769 /*
11770 * Since the "msg_ool_size_small" threshold was increased and
11771 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11772 * address space limits, we revert to doing a virtual copy if the
11773 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11774 * of the commpage would now fail when it used to work.
11775 */
11776 (src_start >= vm_map_min(src_map) &&
11777 src_start < vm_map_max(src_map) &&
11778 src_end >= vm_map_min(src_map) &&
11779 src_end < vm_map_max(src_map))) {
11780 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11781 src_destroy, copy_result);
11782 }
11783
11784 /*
11785 * Allocate a header element for the list.
11786 *
11787 * Use the start and end in the header to
11788 * remember the endpoints prior to rounding.
11789 */
11790
11791 copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
11792 copy->cpy_hdr.entries_pageable = TRUE;
11793 copy->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(src_map);
11794 copy->offset = src_addr;
11795 copy->size = len;
11796
11797 new_entry = vm_map_copy_entry_create(copy);
11798
11799 #define RETURN(x) \
11800 MACRO_BEGIN \
11801 vm_map_unlock(src_map); \
11802 if(src_map != base_map) \
11803 vm_map_deallocate(src_map); \
11804 if (new_entry != VM_MAP_ENTRY_NULL) \
11805 vm_map_copy_entry_dispose(new_entry); \
11806 vm_map_copy_discard(copy); \
11807 { \
11808 submap_map_t *_ptr; \
11809 \
11810 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11811 parent_maps=parent_maps->next; \
11812 if (_ptr->parent_map != base_map) \
11813 vm_map_deallocate(_ptr->parent_map); \
11814 kfree_type(submap_map_t, _ptr); \
11815 } \
11816 } \
11817 MACRO_RETURN(x); \
11818 MACRO_END
11819
11820 /*
11821 * Find the beginning of the region.
11822 */
11823
11824 vm_map_lock(src_map);
11825
11826 /*
11827 * Lookup the original "src_addr" rather than the truncated
11828 * "src_start", in case "src_start" falls in a non-map-aligned
11829 * map entry *before* the map entry that contains "src_addr"...
11830 */
11831 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11832 RETURN(KERN_INVALID_ADDRESS);
11833 }
11834 if (!tmp_entry->is_sub_map) {
11835 /*
11836 * ... but clip to the map-rounded "src_start" rather than
11837 * "src_addr" to preserve map-alignment. We'll adjust the
11838 * first copy entry at the end, if needed.
11839 */
11840 vm_map_clip_start(src_map, tmp_entry, src_start);
11841 }
11842 if (src_start < tmp_entry->vme_start) {
11843 /*
11844 * Move "src_start" up to the start of the
11845 * first map entry to copy.
11846 */
11847 src_start = tmp_entry->vme_start;
11848 }
11849 /* set for later submap fix-up */
11850 copy_addr = src_start;
11851
11852 /*
11853 * Go through entries until we get to the end.
11854 */
11855
11856 while (TRUE) {
11857 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11858 vm_map_size_t src_size; /* Size of source
11859 * map entry (in both
11860 * maps)
11861 */
11862
11863 vm_object_t src_object; /* Object to copy */
11864 vm_object_offset_t src_offset;
11865
11866 vm_object_t new_copy_object;/* vm_object_copy_* result */
11867
11868 boolean_t src_needs_copy; /* Should source map
11869 * be made read-only
11870 * for copy-on-write?
11871 */
11872
11873 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11874
11875 boolean_t was_wired; /* Was source wired? */
11876 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
11877 #if __arm64e__
11878 boolean_t saved_used_for_tpro; /* Saved used_for_tpro */
11879 #endif
11880 vm_map_version_t version; /* Version before locks
11881 * dropped to make copy
11882 */
11883 kern_return_t result; /* Return value from
11884 * copy_strategically.
11885 */
11886 while (tmp_entry->is_sub_map) {
11887 vm_map_size_t submap_len;
11888 submap_map_t *ptr;
11889
11890 ptr = kalloc_type(submap_map_t, Z_WAITOK);
11891 ptr->next = parent_maps;
11892 parent_maps = ptr;
11893 ptr->parent_map = src_map;
11894 ptr->base_start = src_start;
11895 ptr->base_end = src_end;
11896 submap_len = tmp_entry->vme_end - src_start;
11897 if (submap_len > (src_end - src_start)) {
11898 submap_len = src_end - src_start;
11899 }
11900 ptr->base_len = submap_len;
11901
11902 src_start -= tmp_entry->vme_start;
11903 src_start += VME_OFFSET(tmp_entry);
11904 src_end = src_start + submap_len;
11905 src_map = VME_SUBMAP(tmp_entry);
11906 vm_map_lock(src_map);
11907 /* keep an outstanding reference for all maps in */
11908 /* the parents tree except the base map */
11909 vm_map_reference(src_map);
11910 vm_map_unlock(ptr->parent_map);
11911 if (!vm_map_lookup_entry(
11912 src_map, src_start, &tmp_entry)) {
11913 RETURN(KERN_INVALID_ADDRESS);
11914 }
11915 map_share = TRUE;
11916 if (!tmp_entry->is_sub_map) {
11917 vm_map_clip_start(src_map, tmp_entry, src_start);
11918 }
11919 src_entry = tmp_entry;
11920 }
11921 /* we are now in the lowest level submap... */
11922
11923 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11924 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11925 /* This is not, supported for now.In future */
11926 /* we will need to detect the phys_contig */
11927 /* condition and then upgrade copy_slowly */
11928 /* to do physical copy from the device mem */
11929 /* based object. We can piggy-back off of */
11930 /* the was wired boolean to set-up the */
11931 /* proper handling */
11932 RETURN(KERN_PROTECTION_FAILURE);
11933 }
11934 /*
11935 * Create a new address map entry to hold the result.
11936 * Fill in the fields from the appropriate source entries.
11937 * We must unlock the source map to do this if we need
11938 * to allocate a map entry.
11939 */
11940 if (new_entry == VM_MAP_ENTRY_NULL) {
11941 version.main_timestamp = src_map->timestamp;
11942 vm_map_unlock(src_map);
11943
11944 new_entry = vm_map_copy_entry_create(copy);
11945
11946 vm_map_lock(src_map);
11947 if ((version.main_timestamp + 1) != src_map->timestamp) {
11948 if (!vm_map_lookup_entry(src_map, src_start,
11949 &tmp_entry)) {
11950 RETURN(KERN_INVALID_ADDRESS);
11951 }
11952 if (!tmp_entry->is_sub_map) {
11953 vm_map_clip_start(src_map, tmp_entry, src_start);
11954 }
11955 continue; /* restart w/ new tmp_entry */
11956 }
11957 }
11958
11959 /*
11960 * Verify that the region can be read.
11961 */
11962 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11963 !use_maxprot) ||
11964 (src_entry->max_protection & VM_PROT_READ) == 0) {
11965 RETURN(KERN_PROTECTION_FAILURE);
11966 }
11967
11968 /*
11969 * Clip against the endpoints of the entire region.
11970 */
11971
11972 vm_map_clip_end(src_map, src_entry, src_end);
11973
11974 src_size = src_entry->vme_end - src_start;
11975 src_object = VME_OBJECT(src_entry);
11976 src_offset = VME_OFFSET(src_entry);
11977 was_wired = (src_entry->wired_count != 0);
11978
11979 vm_map_entry_copy(src_map, new_entry, src_entry);
11980 if (new_entry->is_sub_map) {
11981 /* clr address space specifics */
11982 new_entry->use_pmap = FALSE;
11983 } else {
11984 /*
11985 * We're dealing with a copy-on-write operation,
11986 * so the resulting mapping should not inherit the
11987 * original mapping's accounting settings.
11988 * "iokit_acct" should have been cleared in
11989 * vm_map_entry_copy().
11990 * "use_pmap" should be reset to its default (TRUE)
11991 * so that the new mapping gets accounted for in
11992 * the task's memory footprint.
11993 */
11994 assert(!new_entry->iokit_acct);
11995 new_entry->use_pmap = TRUE;
11996 }
11997
11998 /*
11999 * Attempt non-blocking copy-on-write optimizations.
12000 */
12001
12002 /*
12003 * If we are destroying the source, and the object
12004 * is internal, we could move the object reference
12005 * from the source to the copy. The copy is
12006 * copy-on-write only if the source is.
12007 * We make another reference to the object, because
12008 * destroying the source entry will deallocate it.
12009 *
12010 * This memory transfer has to be atomic, (to prevent
12011 * the VM object from being shared or copied while
12012 * it's being moved here), so we could only do this
12013 * if we won't have to unlock the VM map until the
12014 * original mapping has been fully removed.
12015 */
12016
12017 RestartCopy:
12018 if ((src_object == VM_OBJECT_NULL ||
12019 (!was_wired && !map_share && !tmp_entry->is_shared
12020 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
12021 vm_object_copy_quickly(
12022 VME_OBJECT(new_entry),
12023 src_offset,
12024 src_size,
12025 &src_needs_copy,
12026 &new_entry_needs_copy)) {
12027 new_entry->needs_copy = new_entry_needs_copy;
12028
12029 /*
12030 * Handle copy-on-write obligations
12031 */
12032
12033 if (src_needs_copy && !tmp_entry->needs_copy) {
12034 vm_prot_t prot;
12035
12036 prot = src_entry->protection & ~VM_PROT_WRITE;
12037
12038 if (override_nx(src_map, VME_ALIAS(src_entry))
12039 && prot) {
12040 prot |= VM_PROT_EXECUTE;
12041 }
12042
12043 vm_object_pmap_protect(
12044 src_object,
12045 src_offset,
12046 src_size,
12047 (src_entry->is_shared ?
12048 PMAP_NULL
12049 : src_map->pmap),
12050 VM_MAP_PAGE_SIZE(src_map),
12051 src_entry->vme_start,
12052 prot);
12053
12054 assert(tmp_entry->wired_count == 0);
12055 tmp_entry->needs_copy = TRUE;
12056 }
12057
12058 /*
12059 * The map has never been unlocked, so it's safe
12060 * to move to the next entry rather than doing
12061 * another lookup.
12062 */
12063
12064 goto CopySuccessful;
12065 }
12066
12067 entry_was_shared = tmp_entry->is_shared;
12068
12069 /*
12070 * Take an object reference, so that we may
12071 * release the map lock(s).
12072 */
12073
12074 assert(src_object != VM_OBJECT_NULL);
12075 vm_object_reference(src_object);
12076
12077 /*
12078 * Record the timestamp for later verification.
12079 * Unlock the map.
12080 */
12081
12082 version.main_timestamp = src_map->timestamp;
12083 vm_map_unlock(src_map); /* Increments timestamp once! */
12084 saved_src_entry = src_entry;
12085 tmp_entry = VM_MAP_ENTRY_NULL;
12086 src_entry = VM_MAP_ENTRY_NULL;
12087
12088 /*
12089 * Perform the copy
12090 */
12091
12092 if (was_wired ||
12093 (debug4k_no_cow_copyin &&
12094 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
12095 CopySlowly:
12096 vm_object_lock(src_object);
12097 result = vm_object_copy_slowly(
12098 src_object,
12099 src_offset,
12100 src_size,
12101 THREAD_UNINT,
12102 &new_copy_object);
12103 /* VME_OBJECT_SET will reset used_for_jit|tpro, so preserve it. */
12104 saved_used_for_jit = new_entry->used_for_jit;
12105 #if __arm64e__
12106 saved_used_for_tpro = new_entry->used_for_tpro;
12107 #endif
12108 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12109 new_entry->used_for_jit = saved_used_for_jit;
12110 #if __arm64e__
12111 new_entry->used_for_tpro = saved_used_for_tpro;
12112 #endif
12113 VME_OFFSET_SET(new_entry,
12114 src_offset - vm_object_trunc_page(src_offset));
12115 new_entry->needs_copy = FALSE;
12116 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12117 (entry_was_shared || map_share)) {
12118 vm_object_t new_object;
12119
12120 vm_object_lock_shared(src_object);
12121 new_object = vm_object_copy_delayed(
12122 src_object,
12123 src_offset,
12124 src_size,
12125 TRUE);
12126 if (new_object == VM_OBJECT_NULL) {
12127 goto CopySlowly;
12128 }
12129
12130 VME_OBJECT_SET(new_entry, new_object, false, 0);
12131 assert(new_entry->wired_count == 0);
12132 new_entry->needs_copy = TRUE;
12133 assert(!new_entry->iokit_acct);
12134 assert(new_object->purgable == VM_PURGABLE_DENY);
12135 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
12136 result = KERN_SUCCESS;
12137 } else {
12138 vm_object_offset_t new_offset;
12139 new_offset = VME_OFFSET(new_entry);
12140 result = vm_object_copy_strategically(src_object,
12141 src_offset,
12142 src_size,
12143 &new_copy_object,
12144 &new_offset,
12145 &new_entry_needs_copy);
12146 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
12147 saved_used_for_jit = new_entry->used_for_jit;
12148 #if __arm64e__
12149 saved_used_for_tpro = new_entry->used_for_tpro;
12150 #endif
12151 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12152 new_entry->used_for_jit = saved_used_for_jit;
12153 #if __arm64e__
12154 new_entry->used_for_tpro = saved_used_for_tpro;
12155 #endif
12156 if (new_offset != VME_OFFSET(new_entry)) {
12157 VME_OFFSET_SET(new_entry, new_offset);
12158 }
12159
12160 new_entry->needs_copy = new_entry_needs_copy;
12161 }
12162
12163 if (result == KERN_SUCCESS &&
12164 ((preserve_purgeable &&
12165 src_object->purgable != VM_PURGABLE_DENY) ||
12166 new_entry->used_for_jit
12167 #if __arm64e__
12168 || new_entry->used_for_tpro
12169 #endif
12170 )) {
12171 /*
12172 * Purgeable objects should be COPY_NONE, true share;
12173 * this should be propogated to the copy.
12174 *
12175 * Also force mappings the pmap specially protects to
12176 * be COPY_NONE; trying to COW these mappings would
12177 * change the effective protections, which could have
12178 * side effects if the pmap layer relies on the
12179 * specified protections.
12180 */
12181
12182 vm_object_t new_object;
12183
12184 new_object = VME_OBJECT(new_entry);
12185 assert(new_object != src_object);
12186 vm_object_lock(new_object);
12187 assert(new_object->ref_count == 1);
12188 assert(new_object->shadow == VM_OBJECT_NULL);
12189 assert(new_object->copy == VM_OBJECT_NULL);
12190 assert(new_object->vo_owner == NULL);
12191
12192 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12193
12194 if (preserve_purgeable &&
12195 src_object->purgable != VM_PURGABLE_DENY) {
12196 new_object->true_share = TRUE;
12197
12198 /* start as non-volatile with no owner... */
12199 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12200 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12201 /* ... and move to src_object's purgeable state */
12202 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12203 int state;
12204 state = src_object->purgable;
12205 vm_object_purgable_control(
12206 new_object,
12207 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12208 &state);
12209 }
12210 /* no pmap accounting for purgeable objects */
12211 new_entry->use_pmap = FALSE;
12212 }
12213
12214 vm_object_unlock(new_object);
12215 new_object = VM_OBJECT_NULL;
12216 }
12217
12218 if (result != KERN_SUCCESS &&
12219 result != KERN_MEMORY_RESTART_COPY) {
12220 vm_map_lock(src_map);
12221 RETURN(result);
12222 }
12223
12224 /*
12225 * Throw away the extra reference
12226 */
12227
12228 vm_object_deallocate(src_object);
12229
12230 /*
12231 * Verify that the map has not substantially
12232 * changed while the copy was being made.
12233 */
12234
12235 vm_map_lock(src_map);
12236
12237 if ((version.main_timestamp + 1) == src_map->timestamp) {
12238 /* src_map hasn't changed: src_entry is still valid */
12239 src_entry = saved_src_entry;
12240 goto VerificationSuccessful;
12241 }
12242
12243 /*
12244 * Simple version comparison failed.
12245 *
12246 * Retry the lookup and verify that the
12247 * same object/offset are still present.
12248 *
12249 * [Note: a memory manager that colludes with
12250 * the calling task can detect that we have
12251 * cheated. While the map was unlocked, the
12252 * mapping could have been changed and restored.]
12253 */
12254
12255 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12256 if (result != KERN_MEMORY_RESTART_COPY) {
12257 vm_object_deallocate(VME_OBJECT(new_entry));
12258 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL, false, 0);
12259 /* reset accounting state */
12260 new_entry->iokit_acct = FALSE;
12261 new_entry->use_pmap = TRUE;
12262 }
12263 RETURN(KERN_INVALID_ADDRESS);
12264 }
12265
12266 src_entry = tmp_entry;
12267 vm_map_clip_start(src_map, src_entry, src_start);
12268
12269 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12270 !use_maxprot) ||
12271 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12272 goto VerificationFailed;
12273 }
12274
12275 if (src_entry->vme_end < new_entry->vme_end) {
12276 /*
12277 * This entry might have been shortened
12278 * (vm_map_clip_end) or been replaced with
12279 * an entry that ends closer to "src_start"
12280 * than before.
12281 * Adjust "new_entry" accordingly; copying
12282 * less memory would be correct but we also
12283 * redo the copy (see below) if the new entry
12284 * no longer points at the same object/offset.
12285 */
12286 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12287 VM_MAP_COPY_PAGE_MASK(copy)));
12288 new_entry->vme_end = src_entry->vme_end;
12289 src_size = new_entry->vme_end - src_start;
12290 } else if (src_entry->vme_end > new_entry->vme_end) {
12291 /*
12292 * This entry might have been extended
12293 * (vm_map_entry_simplify() or coalesce)
12294 * or been replaced with an entry that ends farther
12295 * from "src_start" than before.
12296 *
12297 * We've called vm_object_copy_*() only on
12298 * the previous <start:end> range, so we can't
12299 * just extend new_entry. We have to re-do
12300 * the copy based on the new entry as if it was
12301 * pointing at a different object/offset (see
12302 * "Verification failed" below).
12303 */
12304 }
12305
12306 if ((VME_OBJECT(src_entry) != src_object) ||
12307 (VME_OFFSET(src_entry) != src_offset) ||
12308 (src_entry->vme_end > new_entry->vme_end)) {
12309 /*
12310 * Verification failed.
12311 *
12312 * Start over with this top-level entry.
12313 */
12314
12315 VerificationFailed: ;
12316
12317 vm_object_deallocate(VME_OBJECT(new_entry));
12318 tmp_entry = src_entry;
12319 continue;
12320 }
12321
12322 /*
12323 * Verification succeeded.
12324 */
12325
12326 VerificationSuccessful:;
12327
12328 if (result == KERN_MEMORY_RESTART_COPY) {
12329 goto RestartCopy;
12330 }
12331
12332 /*
12333 * Copy succeeded.
12334 */
12335
12336 CopySuccessful: ;
12337
12338 /*
12339 * Link in the new copy entry.
12340 */
12341
12342 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12343 new_entry);
12344
12345 /*
12346 * Determine whether the entire region
12347 * has been copied.
12348 */
12349 src_base = src_start;
12350 src_start = new_entry->vme_end;
12351 new_entry = VM_MAP_ENTRY_NULL;
12352 while ((src_start >= src_end) && (src_end != 0)) {
12353 submap_map_t *ptr;
12354
12355 if (src_map == base_map) {
12356 /* back to the top */
12357 break;
12358 }
12359
12360 ptr = parent_maps;
12361 assert(ptr != NULL);
12362 parent_maps = parent_maps->next;
12363
12364 /* fix up the damage we did in that submap */
12365 vm_map_simplify_range(src_map,
12366 src_base,
12367 src_end);
12368
12369 vm_map_unlock(src_map);
12370 vm_map_deallocate(src_map);
12371 vm_map_lock(ptr->parent_map);
12372 src_map = ptr->parent_map;
12373 src_base = ptr->base_start;
12374 src_start = ptr->base_start + ptr->base_len;
12375 src_end = ptr->base_end;
12376 if (!vm_map_lookup_entry(src_map,
12377 src_start,
12378 &tmp_entry) &&
12379 (src_end > src_start)) {
12380 RETURN(KERN_INVALID_ADDRESS);
12381 }
12382 kfree_type(submap_map_t, ptr);
12383 if (parent_maps == NULL) {
12384 map_share = FALSE;
12385 }
12386 src_entry = tmp_entry->vme_prev;
12387 }
12388
12389 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12390 (src_start >= src_addr + len) &&
12391 (src_addr + len != 0)) {
12392 /*
12393 * Stop copying now, even though we haven't reached
12394 * "src_end". We'll adjust the end of the last copy
12395 * entry at the end, if needed.
12396 *
12397 * If src_map's aligment is different from the
12398 * system's page-alignment, there could be
12399 * extra non-map-aligned map entries between
12400 * the original (non-rounded) "src_addr + len"
12401 * and the rounded "src_end".
12402 * We do not want to copy those map entries since
12403 * they're not part of the copied range.
12404 */
12405 break;
12406 }
12407
12408 if ((src_start >= src_end) && (src_end != 0)) {
12409 break;
12410 }
12411
12412 /*
12413 * Verify that there are no gaps in the region
12414 */
12415
12416 tmp_entry = src_entry->vme_next;
12417 if ((tmp_entry->vme_start != src_start) ||
12418 (tmp_entry == vm_map_to_entry(src_map))) {
12419 RETURN(KERN_INVALID_ADDRESS);
12420 }
12421 }
12422
12423 /*
12424 * If the source should be destroyed, do it now, since the
12425 * copy was successful.
12426 */
12427 if (src_destroy) {
12428 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
12429
12430 if (src_map == kernel_map) {
12431 remove_flags |= VM_MAP_REMOVE_KUNWIRE;
12432 }
12433 (void)vm_map_remove_and_unlock(src_map,
12434 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
12435 src_end,
12436 remove_flags,
12437 KMEM_GUARD_NONE);
12438 } else {
12439 /* fix up the damage we did in the base map */
12440 vm_map_simplify_range(
12441 src_map,
12442 vm_map_trunc_page(src_addr,
12443 VM_MAP_PAGE_MASK(src_map)),
12444 vm_map_round_page(src_end,
12445 VM_MAP_PAGE_MASK(src_map)));
12446 vm_map_unlock(src_map);
12447 }
12448
12449 tmp_entry = VM_MAP_ENTRY_NULL;
12450
12451 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12452 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12453 vm_map_offset_t original_start, original_offset, original_end;
12454
12455 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12456
12457 /* adjust alignment of first copy_entry's "vme_start" */
12458 tmp_entry = vm_map_copy_first_entry(copy);
12459 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12460 vm_map_offset_t adjustment;
12461
12462 original_start = tmp_entry->vme_start;
12463 original_offset = VME_OFFSET(tmp_entry);
12464
12465 /* map-align the start of the first copy entry... */
12466 adjustment = (tmp_entry->vme_start -
12467 vm_map_trunc_page(
12468 tmp_entry->vme_start,
12469 VM_MAP_PAGE_MASK(src_map)));
12470 tmp_entry->vme_start -= adjustment;
12471 VME_OFFSET_SET(tmp_entry,
12472 VME_OFFSET(tmp_entry) - adjustment);
12473 copy_addr -= adjustment;
12474 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12475 /* ... adjust for mis-aligned start of copy range */
12476 adjustment =
12477 (vm_map_trunc_page(copy->offset,
12478 PAGE_MASK) -
12479 vm_map_trunc_page(copy->offset,
12480 VM_MAP_PAGE_MASK(src_map)));
12481 if (adjustment) {
12482 assert(page_aligned(adjustment));
12483 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12484 tmp_entry->vme_start += adjustment;
12485 VME_OFFSET_SET(tmp_entry,
12486 (VME_OFFSET(tmp_entry) +
12487 adjustment));
12488 copy_addr += adjustment;
12489 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12490 }
12491
12492 /*
12493 * Assert that the adjustments haven't exposed
12494 * more than was originally copied...
12495 */
12496 assert(tmp_entry->vme_start >= original_start);
12497 assert(VME_OFFSET(tmp_entry) >= original_offset);
12498 /*
12499 * ... and that it did not adjust outside of a
12500 * a single 16K page.
12501 */
12502 assert(vm_map_trunc_page(tmp_entry->vme_start,
12503 VM_MAP_PAGE_MASK(src_map)) ==
12504 vm_map_trunc_page(original_start,
12505 VM_MAP_PAGE_MASK(src_map)));
12506 }
12507
12508 /* adjust alignment of last copy_entry's "vme_end" */
12509 tmp_entry = vm_map_copy_last_entry(copy);
12510 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12511 vm_map_offset_t adjustment;
12512
12513 original_end = tmp_entry->vme_end;
12514
12515 /* map-align the end of the last copy entry... */
12516 tmp_entry->vme_end =
12517 vm_map_round_page(tmp_entry->vme_end,
12518 VM_MAP_PAGE_MASK(src_map));
12519 /* ... adjust for mis-aligned end of copy range */
12520 adjustment =
12521 (vm_map_round_page((copy->offset +
12522 copy->size),
12523 VM_MAP_PAGE_MASK(src_map)) -
12524 vm_map_round_page((copy->offset +
12525 copy->size),
12526 PAGE_MASK));
12527 if (adjustment) {
12528 assert(page_aligned(adjustment));
12529 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12530 tmp_entry->vme_end -= adjustment;
12531 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12532 }
12533
12534 /*
12535 * Assert that the adjustments haven't exposed
12536 * more than was originally copied...
12537 */
12538 assert(tmp_entry->vme_end <= original_end);
12539 /*
12540 * ... and that it did not adjust outside of a
12541 * a single 16K page.
12542 */
12543 assert(vm_map_round_page(tmp_entry->vme_end,
12544 VM_MAP_PAGE_MASK(src_map)) ==
12545 vm_map_round_page(original_end,
12546 VM_MAP_PAGE_MASK(src_map)));
12547 }
12548 }
12549
12550 /* Fix-up start and end points in copy. This is necessary */
12551 /* when the various entries in the copy object were picked */
12552 /* up from different sub-maps */
12553
12554 tmp_entry = vm_map_copy_first_entry(copy);
12555 copy_size = 0; /* compute actual size */
12556 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12557 assert(VM_MAP_PAGE_ALIGNED(
12558 copy_addr + (tmp_entry->vme_end -
12559 tmp_entry->vme_start),
12560 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12561 assert(VM_MAP_PAGE_ALIGNED(
12562 copy_addr,
12563 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12564
12565 /*
12566 * The copy_entries will be injected directly into the
12567 * destination map and might not be "map aligned" there...
12568 */
12569 tmp_entry->map_aligned = FALSE;
12570
12571 tmp_entry->vme_end = copy_addr +
12572 (tmp_entry->vme_end - tmp_entry->vme_start);
12573 tmp_entry->vme_start = copy_addr;
12574 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12575 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12576 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12577 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12578 }
12579
12580 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12581 copy_size < copy->size) {
12582 /*
12583 * The actual size of the VM map copy is smaller than what
12584 * was requested by the caller. This must be because some
12585 * PAGE_SIZE-sized pages are missing at the end of the last
12586 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12587 * The caller might not have been aware of those missing
12588 * pages and might not want to be aware of it, which is
12589 * fine as long as they don't try to access (and crash on)
12590 * those missing pages.
12591 * Let's adjust the size of the "copy", to avoid failing
12592 * in vm_map_copyout() or vm_map_copy_overwrite().
12593 */
12594 assert(vm_map_round_page(copy_size,
12595 VM_MAP_PAGE_MASK(src_map)) ==
12596 vm_map_round_page(copy->size,
12597 VM_MAP_PAGE_MASK(src_map)));
12598 copy->size = copy_size;
12599 }
12600
12601 *copy_result = copy;
12602 return KERN_SUCCESS;
12603
12604 #undef RETURN
12605 }
12606
12607 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12608 vm_map_copy_extract(
12609 vm_map_t src_map,
12610 vm_map_address_t src_addr,
12611 vm_map_size_t len,
12612 boolean_t do_copy,
12613 vm_map_copy_t *copy_result, /* OUT */
12614 vm_prot_t *cur_prot, /* IN/OUT */
12615 vm_prot_t *max_prot, /* IN/OUT */
12616 vm_inherit_t inheritance,
12617 vm_map_kernel_flags_t vmk_flags)
12618 {
12619 vm_map_copy_t copy;
12620 kern_return_t kr;
12621 vm_prot_t required_cur_prot, required_max_prot;
12622
12623 /*
12624 * Check for copies of zero bytes.
12625 */
12626
12627 if (len == 0) {
12628 *copy_result = VM_MAP_COPY_NULL;
12629 return KERN_SUCCESS;
12630 }
12631
12632 /*
12633 * Check that the end address doesn't overflow
12634 */
12635 if (src_addr + len < src_addr) {
12636 return KERN_INVALID_ADDRESS;
12637 }
12638 if (__improbable(vm_map_range_overflows(src_map, src_addr, len))) {
12639 return KERN_INVALID_ADDRESS;
12640 }
12641
12642 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12643 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12644 }
12645
12646 required_cur_prot = *cur_prot;
12647 required_max_prot = *max_prot;
12648
12649 /*
12650 * Allocate a header element for the list.
12651 *
12652 * Use the start and end in the header to
12653 * remember the endpoints prior to rounding.
12654 */
12655
12656 copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
12657 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12658 copy->offset = 0;
12659 copy->size = len;
12660
12661 kr = vm_map_remap_extract(src_map,
12662 src_addr,
12663 len,
12664 do_copy, /* copy */
12665 copy,
12666 cur_prot, /* IN/OUT */
12667 max_prot, /* IN/OUT */
12668 inheritance,
12669 vmk_flags);
12670 if (kr != KERN_SUCCESS) {
12671 vm_map_copy_discard(copy);
12672 return kr;
12673 }
12674 if (required_cur_prot != VM_PROT_NONE) {
12675 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12676 assert((*max_prot & required_max_prot) == required_max_prot);
12677 }
12678
12679 *copy_result = copy;
12680 return KERN_SUCCESS;
12681 }
12682
12683 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12684 vm_map_fork_share(
12685 vm_map_t old_map,
12686 vm_map_entry_t old_entry,
12687 vm_map_t new_map)
12688 {
12689 vm_object_t object;
12690 vm_map_entry_t new_entry;
12691
12692 /*
12693 * New sharing code. New map entry
12694 * references original object. Internal
12695 * objects use asynchronous copy algorithm for
12696 * future copies. First make sure we have
12697 * the right object. If we need a shadow,
12698 * or someone else already has one, then
12699 * make a new shadow and share it.
12700 */
12701
12702 if (!old_entry->is_sub_map) {
12703 object = VME_OBJECT(old_entry);
12704 }
12705
12706 if (old_entry->is_sub_map) {
12707 assert(old_entry->wired_count == 0);
12708 #ifndef NO_NESTED_PMAP
12709 #if !PMAP_FORK_NEST
12710 if (old_entry->use_pmap) {
12711 kern_return_t result;
12712
12713 result = pmap_nest(new_map->pmap,
12714 (VME_SUBMAP(old_entry))->pmap,
12715 (addr64_t)old_entry->vme_start,
12716 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12717 if (result) {
12718 panic("vm_map_fork_share: pmap_nest failed!");
12719 }
12720 }
12721 #endif /* !PMAP_FORK_NEST */
12722 #endif /* NO_NESTED_PMAP */
12723 } else if (object == VM_OBJECT_NULL) {
12724 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12725 old_entry->vme_start));
12726 VME_OFFSET_SET(old_entry, 0);
12727 VME_OBJECT_SET(old_entry, object, false, 0);
12728 old_entry->use_pmap = TRUE;
12729 // assert(!old_entry->needs_copy);
12730 } else if (object->copy_strategy !=
12731 MEMORY_OBJECT_COPY_SYMMETRIC) {
12732 /*
12733 * We are already using an asymmetric
12734 * copy, and therefore we already have
12735 * the right object.
12736 */
12737
12738 assert(!old_entry->needs_copy);
12739 } else if (old_entry->needs_copy || /* case 1 */
12740 object->shadowed || /* case 2 */
12741 (!object->true_share && /* case 3 */
12742 !old_entry->is_shared &&
12743 (object->vo_size >
12744 (vm_map_size_t)(old_entry->vme_end -
12745 old_entry->vme_start)))) {
12746 /*
12747 * We need to create a shadow.
12748 * There are three cases here.
12749 * In the first case, we need to
12750 * complete a deferred symmetrical
12751 * copy that we participated in.
12752 * In the second and third cases,
12753 * we need to create the shadow so
12754 * that changes that we make to the
12755 * object do not interfere with
12756 * any symmetrical copies which
12757 * have occured (case 2) or which
12758 * might occur (case 3).
12759 *
12760 * The first case is when we had
12761 * deferred shadow object creation
12762 * via the entry->needs_copy mechanism.
12763 * This mechanism only works when
12764 * only one entry points to the source
12765 * object, and we are about to create
12766 * a second entry pointing to the
12767 * same object. The problem is that
12768 * there is no way of mapping from
12769 * an object to the entries pointing
12770 * to it. (Deferred shadow creation
12771 * works with one entry because occurs
12772 * at fault time, and we walk from the
12773 * entry to the object when handling
12774 * the fault.)
12775 *
12776 * The second case is when the object
12777 * to be shared has already been copied
12778 * with a symmetric copy, but we point
12779 * directly to the object without
12780 * needs_copy set in our entry. (This
12781 * can happen because different ranges
12782 * of an object can be pointed to by
12783 * different entries. In particular,
12784 * a single entry pointing to an object
12785 * can be split by a call to vm_inherit,
12786 * which, combined with task_create, can
12787 * result in the different entries
12788 * having different needs_copy values.)
12789 * The shadowed flag in the object allows
12790 * us to detect this case. The problem
12791 * with this case is that if this object
12792 * has or will have shadows, then we
12793 * must not perform an asymmetric copy
12794 * of this object, since such a copy
12795 * allows the object to be changed, which
12796 * will break the previous symmetrical
12797 * copies (which rely upon the object
12798 * not changing). In a sense, the shadowed
12799 * flag says "don't change this object".
12800 * We fix this by creating a shadow
12801 * object for this object, and sharing
12802 * that. This works because we are free
12803 * to change the shadow object (and thus
12804 * to use an asymmetric copy strategy);
12805 * this is also semantically correct,
12806 * since this object is temporary, and
12807 * therefore a copy of the object is
12808 * as good as the object itself. (This
12809 * is not true for permanent objects,
12810 * since the pager needs to see changes,
12811 * which won't happen if the changes
12812 * are made to a copy.)
12813 *
12814 * The third case is when the object
12815 * to be shared has parts sticking
12816 * outside of the entry we're working
12817 * with, and thus may in the future
12818 * be subject to a symmetrical copy.
12819 * (This is a preemptive version of
12820 * case 2.)
12821 */
12822 VME_OBJECT_SHADOW(old_entry,
12823 (vm_map_size_t) (old_entry->vme_end -
12824 old_entry->vme_start),
12825 vm_map_always_shadow(old_map));
12826
12827 /*
12828 * If we're making a shadow for other than
12829 * copy on write reasons, then we have
12830 * to remove write permission.
12831 */
12832
12833 if (!old_entry->needs_copy &&
12834 (old_entry->protection & VM_PROT_WRITE)) {
12835 vm_prot_t prot;
12836
12837 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12838
12839 prot = old_entry->protection & ~VM_PROT_WRITE;
12840
12841 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12842
12843 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12844 prot |= VM_PROT_EXECUTE;
12845 }
12846
12847
12848 if (old_map->mapped_in_other_pmaps) {
12849 vm_object_pmap_protect(
12850 VME_OBJECT(old_entry),
12851 VME_OFFSET(old_entry),
12852 (old_entry->vme_end -
12853 old_entry->vme_start),
12854 PMAP_NULL,
12855 PAGE_SIZE,
12856 old_entry->vme_start,
12857 prot);
12858 } else {
12859 pmap_protect(old_map->pmap,
12860 old_entry->vme_start,
12861 old_entry->vme_end,
12862 prot);
12863 }
12864 }
12865
12866 old_entry->needs_copy = FALSE;
12867 object = VME_OBJECT(old_entry);
12868 }
12869
12870
12871 /*
12872 * If object was using a symmetric copy strategy,
12873 * change its copy strategy to the default
12874 * asymmetric copy strategy, which is copy_delay
12875 * in the non-norma case and copy_call in the
12876 * norma case. Bump the reference count for the
12877 * new entry.
12878 */
12879
12880 if (old_entry->is_sub_map) {
12881 vm_map_reference(VME_SUBMAP(old_entry));
12882 } else {
12883 vm_object_lock(object);
12884 vm_object_reference_locked(object);
12885 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12886 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12887 }
12888 vm_object_unlock(object);
12889 }
12890
12891 /*
12892 * Clone the entry, using object ref from above.
12893 * Mark both entries as shared.
12894 */
12895
12896 new_entry = vm_map_entry_create(new_map); /* Never the kernel map or descendants */
12897 vm_map_entry_copy(old_map, new_entry, old_entry);
12898 old_entry->is_shared = TRUE;
12899 new_entry->is_shared = TRUE;
12900
12901 /*
12902 * We're dealing with a shared mapping, so the resulting mapping
12903 * should inherit some of the original mapping's accounting settings.
12904 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12905 * "use_pmap" should stay the same as before (if it hasn't been reset
12906 * to TRUE when we cleared "iokit_acct").
12907 */
12908 assert(!new_entry->iokit_acct);
12909
12910 /*
12911 * If old entry's inheritence is VM_INHERIT_NONE,
12912 * the new entry is for corpse fork, remove the
12913 * write permission from the new entry.
12914 */
12915 if (old_entry->inheritance == VM_INHERIT_NONE) {
12916 new_entry->protection &= ~VM_PROT_WRITE;
12917 new_entry->max_protection &= ~VM_PROT_WRITE;
12918 }
12919
12920 /*
12921 * Insert the entry into the new map -- we
12922 * know we're inserting at the end of the new
12923 * map.
12924 */
12925
12926 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12927 VM_MAP_KERNEL_FLAGS_NONE);
12928
12929 /*
12930 * Update the physical map
12931 */
12932
12933 if (old_entry->is_sub_map) {
12934 /* Bill Angell pmap support goes here */
12935 } else {
12936 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12937 old_entry->vme_end - old_entry->vme_start,
12938 old_entry->vme_start);
12939 }
12940 }
12941
12942 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12943 vm_map_fork_copy(
12944 vm_map_t old_map,
12945 vm_map_entry_t *old_entry_p,
12946 vm_map_t new_map,
12947 int vm_map_copyin_flags)
12948 {
12949 vm_map_entry_t old_entry = *old_entry_p;
12950 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12951 vm_map_offset_t start = old_entry->vme_start;
12952 vm_map_copy_t copy;
12953 vm_map_entry_t last = vm_map_last_entry(new_map);
12954
12955 vm_map_unlock(old_map);
12956 /*
12957 * Use maxprot version of copyin because we
12958 * care about whether this memory can ever
12959 * be accessed, not just whether it's accessible
12960 * right now.
12961 */
12962 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12963 if (vm_map_copyin_internal(old_map, start, entry_size,
12964 vm_map_copyin_flags, ©)
12965 != KERN_SUCCESS) {
12966 /*
12967 * The map might have changed while it
12968 * was unlocked, check it again. Skip
12969 * any blank space or permanently
12970 * unreadable region.
12971 */
12972 vm_map_lock(old_map);
12973 if (!vm_map_lookup_entry(old_map, start, &last) ||
12974 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12975 last = last->vme_next;
12976 }
12977 *old_entry_p = last;
12978
12979 /*
12980 * XXX For some error returns, want to
12981 * XXX skip to the next element. Note
12982 * that INVALID_ADDRESS and
12983 * PROTECTION_FAILURE are handled above.
12984 */
12985
12986 return FALSE;
12987 }
12988
12989 /*
12990 * Assert that the vm_map_copy is coming from the right
12991 * zone and hasn't been forged
12992 */
12993 vm_map_copy_require(copy);
12994
12995 /*
12996 * Insert the copy into the new map
12997 */
12998 vm_map_copy_insert(new_map, last, copy);
12999
13000 /*
13001 * Pick up the traversal at the end of
13002 * the copied region.
13003 */
13004
13005 vm_map_lock(old_map);
13006 start += entry_size;
13007 if (!vm_map_lookup_entry(old_map, start, &last)) {
13008 last = last->vme_next;
13009 } else {
13010 if (last->vme_start == start) {
13011 /*
13012 * No need to clip here and we don't
13013 * want to cause any unnecessary
13014 * unnesting...
13015 */
13016 } else {
13017 vm_map_clip_start(old_map, last, start);
13018 }
13019 }
13020 *old_entry_p = last;
13021
13022 return TRUE;
13023 }
13024
13025 #if PMAP_FORK_NEST
13026 #define PMAP_FORK_NEST_DEBUG 0
13027 static inline void
vm_map_fork_unnest(pmap_t new_pmap,vm_map_offset_t pre_nested_start,vm_map_offset_t pre_nested_end,vm_map_offset_t start,vm_map_offset_t end)13028 vm_map_fork_unnest(
13029 pmap_t new_pmap,
13030 vm_map_offset_t pre_nested_start,
13031 vm_map_offset_t pre_nested_end,
13032 vm_map_offset_t start,
13033 vm_map_offset_t end)
13034 {
13035 kern_return_t kr;
13036 vm_map_offset_t nesting_mask, start_unnest, end_unnest;
13037
13038 assertf(pre_nested_start <= pre_nested_end,
13039 "pre_nested start 0x%llx end 0x%llx",
13040 (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
13041 assertf(start <= end,
13042 "start 0x%llx end 0x%llx",
13043 (uint64_t) start, (uint64_t)end);
13044
13045 if (pre_nested_start == pre_nested_end) {
13046 /* nothing was pre-nested: done */
13047 return;
13048 }
13049 if (end <= pre_nested_start) {
13050 /* fully before pre-nested range: done */
13051 return;
13052 }
13053 if (start >= pre_nested_end) {
13054 /* fully after pre-nested range: done */
13055 return;
13056 }
13057 /* ignore parts of range outside of pre_nested range */
13058 if (start < pre_nested_start) {
13059 start = pre_nested_start;
13060 }
13061 if (end > pre_nested_end) {
13062 end = pre_nested_end;
13063 }
13064 nesting_mask = pmap_shared_region_size_min(new_pmap) - 1;
13065 start_unnest = start & ~nesting_mask;
13066 end_unnest = (end + nesting_mask) & ~nesting_mask;
13067 kr = pmap_unnest(new_pmap,
13068 (addr64_t)start_unnest,
13069 (uint64_t)(end_unnest - start_unnest));
13070 #if PMAP_FORK_NEST_DEBUG
13071 printf("PMAP_FORK_NEST %s:%d new_pmap %p 0x%llx:0x%llx -> pmap_unnest 0x%llx:0x%llx kr 0x%x\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)start, (uint64_t)end, (uint64_t)start_unnest, (uint64_t)end_unnest, kr);
13072 #endif /* PMAP_FORK_NEST_DEBUG */
13073 assertf(kr == KERN_SUCCESS,
13074 "0x%llx 0x%llx pmap_unnest(%p, 0x%llx, 0x%llx) -> 0x%x",
13075 (uint64_t)start, (uint64_t)end, new_pmap,
13076 (uint64_t)start_unnest, (uint64_t)(end_unnest - start_unnest),
13077 kr);
13078 }
13079 #endif /* PMAP_FORK_NEST */
13080
13081 void
vm_map_inherit_limits(vm_map_t new_map,const struct _vm_map * old_map)13082 vm_map_inherit_limits(vm_map_t new_map, const struct _vm_map *old_map)
13083 {
13084 new_map->size_limit = old_map->size_limit;
13085 new_map->data_limit = old_map->data_limit;
13086 new_map->user_wire_limit = old_map->user_wire_limit;
13087 new_map->reserved_regions = old_map->reserved_regions;
13088 }
13089
13090 /*
13091 * vm_map_fork:
13092 *
13093 * Create and return a new map based on the old
13094 * map, according to the inheritance values on the
13095 * regions in that map and the options.
13096 *
13097 * The source map must not be locked.
13098 */
13099 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)13100 vm_map_fork(
13101 ledger_t ledger,
13102 vm_map_t old_map,
13103 int options)
13104 {
13105 pmap_t new_pmap;
13106 vm_map_t new_map;
13107 vm_map_entry_t old_entry;
13108 vm_map_size_t new_size = 0, entry_size;
13109 vm_map_entry_t new_entry;
13110 boolean_t src_needs_copy;
13111 boolean_t new_entry_needs_copy;
13112 boolean_t pmap_is64bit;
13113 int vm_map_copyin_flags;
13114 vm_inherit_t old_entry_inheritance;
13115 int map_create_options;
13116 kern_return_t footprint_collect_kr;
13117
13118 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
13119 VM_MAP_FORK_PRESERVE_PURGEABLE |
13120 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
13121 /* unsupported option */
13122 return VM_MAP_NULL;
13123 }
13124
13125 pmap_is64bit =
13126 #if defined(__i386__) || defined(__x86_64__)
13127 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
13128 #elif defined(__arm64__)
13129 old_map->pmap->is_64bit;
13130 #else
13131 #error Unknown architecture.
13132 #endif
13133
13134 unsigned int pmap_flags = 0;
13135 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
13136 #if defined(HAS_APPLE_PAC)
13137 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
13138 #endif
13139 #if CONFIG_ROSETTA
13140 pmap_flags |= old_map->pmap->is_rosetta ? PMAP_CREATE_ROSETTA : 0;
13141 #endif
13142 #if PMAP_CREATE_FORCE_4K_PAGES
13143 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
13144 PAGE_SIZE != FOURK_PAGE_SIZE) {
13145 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
13146 }
13147 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
13148 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
13149 if (new_pmap == NULL) {
13150 return VM_MAP_NULL;
13151 }
13152
13153 vm_map_reference(old_map);
13154 vm_map_lock(old_map);
13155
13156 map_create_options = 0;
13157 if (old_map->hdr.entries_pageable) {
13158 map_create_options |= VM_MAP_CREATE_PAGEABLE;
13159 }
13160 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13161 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
13162 footprint_collect_kr = KERN_SUCCESS;
13163 }
13164 new_map = vm_map_create_options(new_pmap,
13165 old_map->min_offset,
13166 old_map->max_offset,
13167 map_create_options);
13168
13169 /* inherit cs_enforcement */
13170 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
13171
13172 vm_map_lock(new_map);
13173 vm_commit_pagezero_status(new_map);
13174 /* inherit the parent map's page size */
13175 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
13176
13177 /* inherit the parent rlimits */
13178 vm_map_inherit_limits(new_map, old_map);
13179
13180 #if CONFIG_MAP_RANGES
13181 /* inherit the parent map's VM ranges */
13182 vm_map_range_fork(new_map, old_map);
13183 #endif
13184
13185 #if CODE_SIGNING_MONITOR
13186 /* Prepare the monitor for the fork */
13187 csm_fork_prepare(old_map->pmap, new_pmap);
13188 #endif
13189
13190 #if PMAP_FORK_NEST
13191 /*
13192 * Pre-nest the shared region's pmap.
13193 */
13194 vm_map_offset_t pre_nested_start = 0, pre_nested_end = 0;
13195 pmap_fork_nest(old_map->pmap, new_pmap,
13196 &pre_nested_start, &pre_nested_end);
13197 #if PMAP_FORK_NEST_DEBUG
13198 printf("PMAP_FORK_NEST %s:%d old %p new %p pre_nested start 0x%llx end 0x%llx\n", __FUNCTION__, __LINE__, old_map->pmap, new_pmap, (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
13199 #endif /* PMAP_FORK_NEST_DEBUG */
13200 #endif /* PMAP_FORK_NEST */
13201
13202 for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
13203 /*
13204 * Abort any corpse collection if the system is shutting down.
13205 */
13206 if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13207 get_system_inshutdown()) {
13208 #if PMAP_FORK_NEST
13209 new_entry = vm_map_last_entry(new_map);
13210 if (new_entry == vm_map_to_entry(new_map)) {
13211 /* unnest all that was pre-nested */
13212 vm_map_fork_unnest(new_pmap,
13213 pre_nested_start, pre_nested_end,
13214 vm_map_min(new_map), vm_map_max(new_map));
13215 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13216 /* unnest hole at the end, if pre-nested */
13217 vm_map_fork_unnest(new_pmap,
13218 pre_nested_start, pre_nested_end,
13219 new_entry->vme_end, vm_map_max(new_map));
13220 }
13221 #endif /* PMAP_FORK_NEST */
13222 vm_map_corpse_footprint_collect_done(new_map);
13223 vm_map_unlock(new_map);
13224 vm_map_unlock(old_map);
13225 vm_map_deallocate(new_map);
13226 vm_map_deallocate(old_map);
13227 printf("Aborting corpse map due to system shutdown\n");
13228 return VM_MAP_NULL;
13229 }
13230
13231 entry_size = old_entry->vme_end - old_entry->vme_start;
13232
13233 #if PMAP_FORK_NEST
13234 /*
13235 * Undo any unnecessary pre-nesting.
13236 */
13237 vm_map_offset_t prev_end;
13238 if (old_entry == vm_map_first_entry(old_map)) {
13239 prev_end = vm_map_min(old_map);
13240 } else {
13241 prev_end = old_entry->vme_prev->vme_end;
13242 }
13243 if (prev_end < old_entry->vme_start) {
13244 /* unnest hole before this entry, if pre-nested */
13245 vm_map_fork_unnest(new_pmap,
13246 pre_nested_start, pre_nested_end,
13247 prev_end, old_entry->vme_start);
13248 }
13249 if (old_entry->is_sub_map && old_entry->use_pmap) {
13250 /* keep this entry nested in the child */
13251 #if PMAP_FORK_NEST_DEBUG
13252 printf("PMAP_FORK_NEST %s:%d new_pmap %p keeping 0x%llx:0x%llx nested\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)old_entry->vme_start, (uint64_t)old_entry->vme_end);
13253 #endif /* PMAP_FORK_NEST_DEBUG */
13254 } else {
13255 /* undo nesting for this entry, if pre-nested */
13256 vm_map_fork_unnest(new_pmap,
13257 pre_nested_start, pre_nested_end,
13258 old_entry->vme_start, old_entry->vme_end);
13259 }
13260 #endif /* PMAP_FORK_NEST */
13261
13262 old_entry_inheritance = old_entry->inheritance;
13263 /*
13264 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
13265 * share VM_INHERIT_NONE entries that are not backed by a
13266 * device pager.
13267 */
13268 if (old_entry_inheritance == VM_INHERIT_NONE &&
13269 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
13270 (old_entry->protection & VM_PROT_READ) &&
13271 !(!old_entry->is_sub_map &&
13272 VME_OBJECT(old_entry) != NULL &&
13273 VME_OBJECT(old_entry)->pager != NULL &&
13274 is_device_pager_ops(
13275 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
13276 old_entry_inheritance = VM_INHERIT_SHARE;
13277 }
13278
13279 if (old_entry_inheritance != VM_INHERIT_NONE &&
13280 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13281 footprint_collect_kr == KERN_SUCCESS) {
13282 /*
13283 * The corpse won't have old_map->pmap to query
13284 * footprint information, so collect that data now
13285 * and store it in new_map->vmmap_corpse_footprint
13286 * for later autopsy.
13287 */
13288 footprint_collect_kr =
13289 vm_map_corpse_footprint_collect(old_map,
13290 old_entry,
13291 new_map);
13292 }
13293
13294 switch (old_entry_inheritance) {
13295 case VM_INHERIT_NONE:
13296 break;
13297
13298 case VM_INHERIT_SHARE:
13299 vm_map_fork_share(old_map, old_entry, new_map);
13300 new_size += entry_size;
13301 break;
13302
13303 case VM_INHERIT_COPY:
13304
13305 /*
13306 * Inline the copy_quickly case;
13307 * upon failure, fall back on call
13308 * to vm_map_fork_copy.
13309 */
13310
13311 if (old_entry->is_sub_map) {
13312 break;
13313 }
13314 if ((old_entry->wired_count != 0) ||
13315 ((VME_OBJECT(old_entry) != NULL) &&
13316 (VME_OBJECT(old_entry)->true_share))) {
13317 goto slow_vm_map_fork_copy;
13318 }
13319
13320 new_entry = vm_map_entry_create(new_map); /* never the kernel map or descendants */
13321 vm_map_entry_copy(old_map, new_entry, old_entry);
13322 if (old_entry->vme_permanent) {
13323 /* inherit "permanent" on fork() */
13324 new_entry->vme_permanent = TRUE;
13325 }
13326
13327 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13328 new_map->jit_entry_exists = TRUE;
13329 }
13330
13331 if (new_entry->is_sub_map) {
13332 /* clear address space specifics */
13333 new_entry->use_pmap = FALSE;
13334 } else {
13335 /*
13336 * We're dealing with a copy-on-write operation,
13337 * so the resulting mapping should not inherit
13338 * the original mapping's accounting settings.
13339 * "iokit_acct" should have been cleared in
13340 * vm_map_entry_copy().
13341 * "use_pmap" should be reset to its default
13342 * (TRUE) so that the new mapping gets
13343 * accounted for in the task's memory footprint.
13344 */
13345 assert(!new_entry->iokit_acct);
13346 new_entry->use_pmap = TRUE;
13347 }
13348
13349 if (!vm_object_copy_quickly(
13350 VME_OBJECT(new_entry),
13351 VME_OFFSET(old_entry),
13352 (old_entry->vme_end -
13353 old_entry->vme_start),
13354 &src_needs_copy,
13355 &new_entry_needs_copy)) {
13356 vm_map_entry_dispose(new_entry);
13357 goto slow_vm_map_fork_copy;
13358 }
13359
13360 /*
13361 * Handle copy-on-write obligations
13362 */
13363
13364 if (src_needs_copy && !old_entry->needs_copy) {
13365 vm_prot_t prot;
13366
13367 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13368
13369 prot = old_entry->protection & ~VM_PROT_WRITE;
13370
13371 if (override_nx(old_map, VME_ALIAS(old_entry))
13372 && prot) {
13373 prot |= VM_PROT_EXECUTE;
13374 }
13375
13376 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13377
13378 vm_object_pmap_protect(
13379 VME_OBJECT(old_entry),
13380 VME_OFFSET(old_entry),
13381 (old_entry->vme_end -
13382 old_entry->vme_start),
13383 ((old_entry->is_shared
13384 || old_map->mapped_in_other_pmaps)
13385 ? PMAP_NULL :
13386 old_map->pmap),
13387 VM_MAP_PAGE_SIZE(old_map),
13388 old_entry->vme_start,
13389 prot);
13390
13391 assert(old_entry->wired_count == 0);
13392 old_entry->needs_copy = TRUE;
13393 }
13394 new_entry->needs_copy = new_entry_needs_copy;
13395
13396 /*
13397 * Insert the entry at the end
13398 * of the map.
13399 */
13400
13401 vm_map_store_entry_link(new_map,
13402 vm_map_last_entry(new_map),
13403 new_entry,
13404 VM_MAP_KERNEL_FLAGS_NONE);
13405 new_size += entry_size;
13406 break;
13407
13408 slow_vm_map_fork_copy:
13409 vm_map_copyin_flags = 0;
13410 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13411 vm_map_copyin_flags |=
13412 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13413 }
13414 if (vm_map_fork_copy(old_map,
13415 &old_entry,
13416 new_map,
13417 vm_map_copyin_flags)) {
13418 new_size += entry_size;
13419 }
13420 continue;
13421 }
13422 old_entry = old_entry->vme_next;
13423 }
13424
13425 #if PMAP_FORK_NEST
13426 new_entry = vm_map_last_entry(new_map);
13427 if (new_entry == vm_map_to_entry(new_map)) {
13428 /* unnest all that was pre-nested */
13429 vm_map_fork_unnest(new_pmap,
13430 pre_nested_start, pre_nested_end,
13431 vm_map_min(new_map), vm_map_max(new_map));
13432 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13433 /* unnest hole at the end, if pre-nested */
13434 vm_map_fork_unnest(new_pmap,
13435 pre_nested_start, pre_nested_end,
13436 new_entry->vme_end, vm_map_max(new_map));
13437 }
13438 #endif /* PMAP_FORK_NEST */
13439
13440 #if defined(__arm64__)
13441 pmap_insert_commpage(new_map->pmap);
13442 #endif /* __arm64__ */
13443
13444 new_map->size = new_size;
13445
13446 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13447 vm_map_corpse_footprint_collect_done(new_map);
13448 }
13449
13450 /* Propagate JIT entitlement for the pmap layer. */
13451 if (pmap_get_jit_entitled(old_map->pmap)) {
13452 /* Tell the pmap that it supports JIT. */
13453 pmap_set_jit_entitled(new_map->pmap);
13454 }
13455
13456 /* Propagate TPRO settings for the pmap layer */
13457 if (pmap_get_tpro(old_map->pmap)) {
13458 /* Tell the pmap that it supports TPRO */
13459 pmap_set_tpro(new_map->pmap);
13460 }
13461
13462 vm_map_unlock(new_map);
13463 vm_map_unlock(old_map);
13464 vm_map_deallocate(old_map);
13465
13466 return new_map;
13467 }
13468
13469 /*
13470 * vm_map_exec:
13471 *
13472 * Setup the "new_map" with the proper execution environment according
13473 * to the type of executable (platform, 64bit, chroot environment).
13474 * Map the comm page and shared region, etc...
13475 */
13476 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)13477 vm_map_exec(
13478 vm_map_t new_map,
13479 task_t task,
13480 boolean_t is64bit,
13481 void *fsroot,
13482 cpu_type_t cpu,
13483 cpu_subtype_t cpu_subtype,
13484 boolean_t reslide,
13485 boolean_t is_driverkit,
13486 uint32_t rsr_version)
13487 {
13488 SHARED_REGION_TRACE_DEBUG(
13489 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13490 (void *)VM_KERNEL_ADDRPERM(current_task()),
13491 (void *)VM_KERNEL_ADDRPERM(new_map),
13492 (void *)VM_KERNEL_ADDRPERM(task),
13493 (void *)VM_KERNEL_ADDRPERM(fsroot),
13494 cpu,
13495 cpu_subtype));
13496 (void) vm_commpage_enter(new_map, task, is64bit);
13497
13498 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit, rsr_version);
13499
13500 SHARED_REGION_TRACE_DEBUG(
13501 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13502 (void *)VM_KERNEL_ADDRPERM(current_task()),
13503 (void *)VM_KERNEL_ADDRPERM(new_map),
13504 (void *)VM_KERNEL_ADDRPERM(task),
13505 (void *)VM_KERNEL_ADDRPERM(fsroot),
13506 cpu,
13507 cpu_subtype));
13508
13509 /*
13510 * Some devices have region(s) of memory that shouldn't get allocated by
13511 * user processes. The following code creates dummy vm_map_entry_t's for each
13512 * of the regions that needs to be reserved to prevent any allocations in
13513 * those regions.
13514 */
13515 kern_return_t kr = KERN_FAILURE;
13516 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT();
13517 vmk_flags.vmkf_beyond_max = true;
13518
13519 const struct vm_reserved_region *regions = NULL;
13520 size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
13521 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13522
13523 for (size_t i = 0; i < num_regions; ++i) {
13524 vm_map_offset_t address = regions[i].vmrr_addr;
13525
13526 kr = vm_map_enter(
13527 new_map,
13528 &address,
13529 regions[i].vmrr_size,
13530 (vm_map_offset_t)0,
13531 vmk_flags,
13532 VM_OBJECT_NULL,
13533 (vm_object_offset_t)0,
13534 FALSE,
13535 VM_PROT_NONE,
13536 VM_PROT_NONE,
13537 VM_INHERIT_COPY);
13538
13539 if (kr != KERN_SUCCESS) {
13540 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13541 }
13542 }
13543
13544 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13545
13546 return KERN_SUCCESS;
13547 }
13548
13549 uint64_t vm_map_lookup_and_lock_object_copy_slowly_count = 0;
13550 uint64_t vm_map_lookup_and_lock_object_copy_slowly_size = 0;
13551 uint64_t vm_map_lookup_and_lock_object_copy_slowly_max = 0;
13552 uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart = 0;
13553 uint64_t vm_map_lookup_and_lock_object_copy_slowly_error = 0;
13554 uint64_t vm_map_lookup_and_lock_object_copy_strategically_count = 0;
13555 uint64_t vm_map_lookup_and_lock_object_copy_strategically_size = 0;
13556 uint64_t vm_map_lookup_and_lock_object_copy_strategically_max = 0;
13557 uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart = 0;
13558 uint64_t vm_map_lookup_and_lock_object_copy_strategically_error = 0;
13559 uint64_t vm_map_lookup_and_lock_object_copy_shadow_count = 0;
13560 uint64_t vm_map_lookup_and_lock_object_copy_shadow_size = 0;
13561 uint64_t vm_map_lookup_and_lock_object_copy_shadow_max = 0;
13562 /*
13563 * vm_map_lookup_and_lock_object:
13564 *
13565 * Finds the VM object, offset, and
13566 * protection for a given virtual address in the
13567 * specified map, assuming a page fault of the
13568 * type specified.
13569 *
13570 * Returns the (object, offset, protection) for
13571 * this address, whether it is wired down, and whether
13572 * this map has the only reference to the data in question.
13573 * In order to later verify this lookup, a "version"
13574 * is returned.
13575 * If contended != NULL, *contended will be set to
13576 * true iff the thread had to spin or block to acquire
13577 * an exclusive lock.
13578 *
13579 * The map MUST be locked by the caller and WILL be
13580 * locked on exit. In order to guarantee the
13581 * existence of the returned object, it is returned
13582 * locked.
13583 *
13584 * If a lookup is requested with "write protection"
13585 * specified, the map may be changed to perform virtual
13586 * copying operations, although the data referenced will
13587 * remain the same.
13588 */
13589 kern_return_t
vm_map_lookup_and_lock_object(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13590 vm_map_lookup_and_lock_object(
13591 vm_map_t *var_map, /* IN/OUT */
13592 vm_map_offset_t vaddr,
13593 vm_prot_t fault_type,
13594 int object_lock_type,
13595 vm_map_version_t *out_version, /* OUT */
13596 vm_object_t *object, /* OUT */
13597 vm_object_offset_t *offset, /* OUT */
13598 vm_prot_t *out_prot, /* OUT */
13599 boolean_t *wired, /* OUT */
13600 vm_object_fault_info_t fault_info, /* OUT */
13601 vm_map_t *real_map, /* OUT */
13602 bool *contended) /* OUT */
13603 {
13604 vm_map_entry_t entry;
13605 vm_map_t map = *var_map;
13606 vm_map_t old_map = *var_map;
13607 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13608 vm_map_offset_t cow_parent_vaddr = 0;
13609 vm_map_offset_t old_start = 0;
13610 vm_map_offset_t old_end = 0;
13611 vm_prot_t prot;
13612 boolean_t mask_protections;
13613 boolean_t force_copy;
13614 boolean_t no_force_copy_if_executable;
13615 boolean_t submap_needed_copy;
13616 vm_prot_t original_fault_type;
13617 vm_map_size_t fault_page_mask;
13618
13619 /*
13620 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13621 * as a mask against the mapping's actual protections, not as an
13622 * absolute value.
13623 */
13624 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13625 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13626 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13627 fault_type &= VM_PROT_ALL;
13628 original_fault_type = fault_type;
13629 if (contended) {
13630 *contended = false;
13631 }
13632
13633 *real_map = map;
13634
13635 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13636 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13637
13638 RetryLookup:
13639 fault_type = original_fault_type;
13640
13641 /*
13642 * If the map has an interesting hint, try it before calling
13643 * full blown lookup routine.
13644 */
13645 entry = map->hint;
13646
13647 if ((entry == vm_map_to_entry(map)) ||
13648 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13649 vm_map_entry_t tmp_entry;
13650
13651 /*
13652 * Entry was either not a valid hint, or the vaddr
13653 * was not contained in the entry, so do a full lookup.
13654 */
13655 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13656 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13657 vm_map_unlock(cow_sub_map_parent);
13658 }
13659 if ((*real_map != map)
13660 && (*real_map != cow_sub_map_parent)) {
13661 vm_map_unlock(*real_map);
13662 }
13663 return KERN_INVALID_ADDRESS;
13664 }
13665
13666 entry = tmp_entry;
13667 }
13668 if (map == old_map) {
13669 old_start = entry->vme_start;
13670 old_end = entry->vme_end;
13671 }
13672
13673 /*
13674 * Handle submaps. Drop lock on upper map, submap is
13675 * returned locked.
13676 */
13677
13678 submap_needed_copy = FALSE;
13679 submap_recurse:
13680 if (entry->is_sub_map) {
13681 vm_map_offset_t local_vaddr;
13682 vm_map_offset_t end_delta;
13683 vm_map_offset_t start_delta;
13684 vm_map_offset_t top_entry_saved_start;
13685 vm_object_offset_t top_entry_saved_offset;
13686 vm_map_entry_t submap_entry, saved_submap_entry;
13687 vm_object_offset_t submap_entry_offset;
13688 vm_object_size_t submap_entry_size;
13689 vm_prot_t subentry_protection;
13690 vm_prot_t subentry_max_protection;
13691 boolean_t subentry_no_copy_on_read;
13692 boolean_t subentry_permanent;
13693 boolean_t subentry_csm_associated;
13694 boolean_t mapped_needs_copy = FALSE;
13695 vm_map_version_t version;
13696
13697 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13698 "map %p (%d) entry %p submap %p (%d)\n",
13699 map, VM_MAP_PAGE_SHIFT(map), entry,
13700 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13701
13702 local_vaddr = vaddr;
13703 top_entry_saved_start = entry->vme_start;
13704 top_entry_saved_offset = VME_OFFSET(entry);
13705
13706 if ((entry->use_pmap &&
13707 !((fault_type & VM_PROT_WRITE) ||
13708 force_copy))) {
13709 /* if real_map equals map we unlock below */
13710 if ((*real_map != map) &&
13711 (*real_map != cow_sub_map_parent)) {
13712 vm_map_unlock(*real_map);
13713 }
13714 *real_map = VME_SUBMAP(entry);
13715 }
13716
13717 if (entry->needs_copy &&
13718 ((fault_type & VM_PROT_WRITE) ||
13719 force_copy)) {
13720 if (!mapped_needs_copy) {
13721 if (vm_map_lock_read_to_write(map)) {
13722 vm_map_lock_read(map);
13723 *real_map = map;
13724 goto RetryLookup;
13725 }
13726 vm_map_lock_read(VME_SUBMAP(entry));
13727 *var_map = VME_SUBMAP(entry);
13728 cow_sub_map_parent = map;
13729 /* reset base to map before cow object */
13730 /* this is the map which will accept */
13731 /* the new cow object */
13732 old_start = entry->vme_start;
13733 old_end = entry->vme_end;
13734 cow_parent_vaddr = vaddr;
13735 mapped_needs_copy = TRUE;
13736 } else {
13737 vm_map_lock_read(VME_SUBMAP(entry));
13738 *var_map = VME_SUBMAP(entry);
13739 if ((cow_sub_map_parent != map) &&
13740 (*real_map != map)) {
13741 vm_map_unlock(map);
13742 }
13743 }
13744 } else {
13745 if (entry->needs_copy) {
13746 submap_needed_copy = TRUE;
13747 }
13748 vm_map_lock_read(VME_SUBMAP(entry));
13749 *var_map = VME_SUBMAP(entry);
13750 /* leave map locked if it is a target */
13751 /* cow sub_map above otherwise, just */
13752 /* follow the maps down to the object */
13753 /* here we unlock knowing we are not */
13754 /* revisiting the map. */
13755 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13756 vm_map_unlock_read(map);
13757 }
13758 }
13759
13760 entry = NULL;
13761 map = *var_map;
13762
13763 /* calculate the offset in the submap for vaddr */
13764 local_vaddr = (local_vaddr - top_entry_saved_start) + top_entry_saved_offset;
13765 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13766 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13767 (uint64_t)local_vaddr, (uint64_t)top_entry_saved_start, (uint64_t)fault_page_mask);
13768
13769 RetrySubMap:
13770 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13771 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13772 vm_map_unlock(cow_sub_map_parent);
13773 }
13774 if ((*real_map != map)
13775 && (*real_map != cow_sub_map_parent)) {
13776 vm_map_unlock(*real_map);
13777 }
13778 *real_map = map;
13779 return KERN_INVALID_ADDRESS;
13780 }
13781
13782 /* find the attenuated shadow of the underlying object */
13783 /* on our target map */
13784
13785 /* in english the submap object may extend beyond the */
13786 /* region mapped by the entry or, may only fill a portion */
13787 /* of it. For our purposes, we only care if the object */
13788 /* doesn't fill. In this case the area which will */
13789 /* ultimately be clipped in the top map will only need */
13790 /* to be as big as the portion of the underlying entry */
13791 /* which is mapped */
13792 start_delta = submap_entry->vme_start > top_entry_saved_offset ?
13793 submap_entry->vme_start - top_entry_saved_offset : 0;
13794
13795 end_delta =
13796 (top_entry_saved_offset + start_delta + (old_end - old_start)) <=
13797 submap_entry->vme_end ?
13798 0 : (top_entry_saved_offset +
13799 (old_end - old_start))
13800 - submap_entry->vme_end;
13801
13802 old_start += start_delta;
13803 old_end -= end_delta;
13804
13805 if (submap_entry->is_sub_map) {
13806 entry = submap_entry;
13807 vaddr = local_vaddr;
13808 goto submap_recurse;
13809 }
13810
13811 if (((fault_type & VM_PROT_WRITE) ||
13812 force_copy)
13813 && cow_sub_map_parent) {
13814 vm_object_t sub_object, copy_object;
13815 vm_object_offset_t copy_offset;
13816 vm_map_offset_t local_start;
13817 vm_map_offset_t local_end;
13818 boolean_t object_copied = FALSE;
13819 vm_object_offset_t object_copied_offset = 0;
13820 boolean_t object_copied_needs_copy = FALSE;
13821 kern_return_t kr = KERN_SUCCESS;
13822
13823 if (vm_map_lock_read_to_write(map)) {
13824 vm_map_lock_read(map);
13825 old_start -= start_delta;
13826 old_end += end_delta;
13827 goto RetrySubMap;
13828 }
13829
13830
13831 sub_object = VME_OBJECT(submap_entry);
13832 if (sub_object == VM_OBJECT_NULL) {
13833 sub_object =
13834 vm_object_allocate(
13835 (vm_map_size_t)
13836 (submap_entry->vme_end -
13837 submap_entry->vme_start));
13838 VME_OBJECT_SET(submap_entry, sub_object, false, 0);
13839 VME_OFFSET_SET(submap_entry, 0);
13840 assert(!submap_entry->is_sub_map);
13841 assert(submap_entry->use_pmap);
13842 }
13843 local_start = local_vaddr -
13844 (cow_parent_vaddr - old_start);
13845 local_end = local_vaddr +
13846 (old_end - cow_parent_vaddr);
13847 vm_map_clip_start(map, submap_entry, local_start);
13848 vm_map_clip_end(map, submap_entry, local_end);
13849 if (submap_entry->is_sub_map) {
13850 /* unnesting was done when clipping */
13851 assert(!submap_entry->use_pmap);
13852 }
13853
13854 /* This is the COW case, lets connect */
13855 /* an entry in our space to the underlying */
13856 /* object in the submap, bypassing the */
13857 /* submap. */
13858 submap_entry_offset = VME_OFFSET(submap_entry);
13859 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13860
13861 if ((submap_entry->wired_count != 0 ||
13862 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13863 (submap_entry->protection & VM_PROT_EXECUTE) &&
13864 no_force_copy_if_executable) {
13865 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13866 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13867 vm_map_unlock(cow_sub_map_parent);
13868 }
13869 if ((*real_map != map)
13870 && (*real_map != cow_sub_map_parent)) {
13871 vm_map_unlock(*real_map);
13872 }
13873 *real_map = map;
13874 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13875 vm_map_lock_write_to_read(map);
13876 kr = KERN_PROTECTION_FAILURE;
13877 DTRACE_VM4(submap_no_copy_executable,
13878 vm_map_t, map,
13879 vm_object_offset_t, submap_entry_offset,
13880 vm_object_size_t, submap_entry_size,
13881 int, kr);
13882 return kr;
13883 }
13884
13885 if (submap_entry->wired_count != 0) {
13886 vm_object_reference(sub_object);
13887
13888 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13889 "submap_entry %p offset 0x%llx\n",
13890 submap_entry, VME_OFFSET(submap_entry));
13891
13892 DTRACE_VM6(submap_copy_slowly,
13893 vm_map_t, cow_sub_map_parent,
13894 vm_map_offset_t, vaddr,
13895 vm_map_t, map,
13896 vm_object_size_t, submap_entry_size,
13897 int, submap_entry->wired_count,
13898 int, sub_object->copy_strategy);
13899
13900 saved_submap_entry = submap_entry;
13901 version.main_timestamp = map->timestamp;
13902 vm_map_unlock(map); /* Increments timestamp by 1 */
13903 submap_entry = VM_MAP_ENTRY_NULL;
13904
13905 vm_object_lock(sub_object);
13906 kr = vm_object_copy_slowly(sub_object,
13907 submap_entry_offset,
13908 submap_entry_size,
13909 FALSE,
13910 ©_object);
13911 object_copied = TRUE;
13912 object_copied_offset = 0;
13913 /* 4k: account for extra offset in physical page */
13914 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13915 object_copied_needs_copy = FALSE;
13916 vm_object_deallocate(sub_object);
13917
13918 vm_map_lock(map);
13919
13920 if (kr != KERN_SUCCESS &&
13921 kr != KERN_MEMORY_RESTART_COPY) {
13922 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13923 vm_map_unlock(cow_sub_map_parent);
13924 }
13925 if ((*real_map != map)
13926 && (*real_map != cow_sub_map_parent)) {
13927 vm_map_unlock(*real_map);
13928 }
13929 *real_map = map;
13930 vm_object_deallocate(copy_object);
13931 copy_object = VM_OBJECT_NULL;
13932 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13933 vm_map_lock_write_to_read(map);
13934 DTRACE_VM4(submap_copy_error_slowly,
13935 vm_object_t, sub_object,
13936 vm_object_offset_t, submap_entry_offset,
13937 vm_object_size_t, submap_entry_size,
13938 int, kr);
13939 vm_map_lookup_and_lock_object_copy_slowly_error++;
13940 return kr;
13941 }
13942
13943 if ((kr == KERN_SUCCESS) &&
13944 (version.main_timestamp + 1) == map->timestamp) {
13945 submap_entry = saved_submap_entry;
13946 } else {
13947 saved_submap_entry = NULL;
13948 old_start -= start_delta;
13949 old_end += end_delta;
13950 vm_object_deallocate(copy_object);
13951 copy_object = VM_OBJECT_NULL;
13952 vm_map_lock_write_to_read(map);
13953 vm_map_lookup_and_lock_object_copy_slowly_restart++;
13954 goto RetrySubMap;
13955 }
13956 vm_map_lookup_and_lock_object_copy_slowly_count++;
13957 vm_map_lookup_and_lock_object_copy_slowly_size += submap_entry_size;
13958 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_slowly_max) {
13959 vm_map_lookup_and_lock_object_copy_slowly_max = submap_entry_size;
13960 }
13961 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13962 submap_entry_offset = VME_OFFSET(submap_entry);
13963 copy_object = VM_OBJECT_NULL;
13964 object_copied_offset = submap_entry_offset;
13965 object_copied_needs_copy = FALSE;
13966 DTRACE_VM6(submap_copy_strategically,
13967 vm_map_t, cow_sub_map_parent,
13968 vm_map_offset_t, vaddr,
13969 vm_map_t, map,
13970 vm_object_size_t, submap_entry_size,
13971 int, submap_entry->wired_count,
13972 int, sub_object->copy_strategy);
13973 kr = vm_object_copy_strategically(
13974 sub_object,
13975 submap_entry_offset,
13976 submap_entry->vme_end - submap_entry->vme_start,
13977 ©_object,
13978 &object_copied_offset,
13979 &object_copied_needs_copy);
13980 if (kr == KERN_MEMORY_RESTART_COPY) {
13981 old_start -= start_delta;
13982 old_end += end_delta;
13983 vm_object_deallocate(copy_object);
13984 copy_object = VM_OBJECT_NULL;
13985 vm_map_lock_write_to_read(map);
13986 vm_map_lookup_and_lock_object_copy_strategically_restart++;
13987 goto RetrySubMap;
13988 }
13989 if (kr != KERN_SUCCESS) {
13990 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13991 vm_map_unlock(cow_sub_map_parent);
13992 }
13993 if ((*real_map != map)
13994 && (*real_map != cow_sub_map_parent)) {
13995 vm_map_unlock(*real_map);
13996 }
13997 *real_map = map;
13998 vm_object_deallocate(copy_object);
13999 copy_object = VM_OBJECT_NULL;
14000 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
14001 vm_map_lock_write_to_read(map);
14002 DTRACE_VM4(submap_copy_error_strategically,
14003 vm_object_t, sub_object,
14004 vm_object_offset_t, submap_entry_offset,
14005 vm_object_size_t, submap_entry_size,
14006 int, kr);
14007 vm_map_lookup_and_lock_object_copy_strategically_error++;
14008 return kr;
14009 }
14010 assert(copy_object != VM_OBJECT_NULL);
14011 assert(copy_object != sub_object);
14012 object_copied = TRUE;
14013 vm_map_lookup_and_lock_object_copy_strategically_count++;
14014 vm_map_lookup_and_lock_object_copy_strategically_size += submap_entry_size;
14015 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_strategically_max) {
14016 vm_map_lookup_and_lock_object_copy_strategically_max = submap_entry_size;
14017 }
14018 } else {
14019 /* set up shadow object */
14020 object_copied = FALSE;
14021 copy_object = sub_object;
14022 vm_object_lock(sub_object);
14023 vm_object_reference_locked(sub_object);
14024 sub_object->shadowed = TRUE;
14025 vm_object_unlock(sub_object);
14026
14027 assert(submap_entry->wired_count == 0);
14028 submap_entry->needs_copy = TRUE;
14029
14030 prot = submap_entry->protection;
14031 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
14032 prot = prot & ~VM_PROT_WRITE;
14033 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
14034
14035 if (override_nx(old_map,
14036 VME_ALIAS(submap_entry))
14037 && prot) {
14038 prot |= VM_PROT_EXECUTE;
14039 }
14040
14041 vm_object_pmap_protect(
14042 sub_object,
14043 VME_OFFSET(submap_entry),
14044 submap_entry->vme_end -
14045 submap_entry->vme_start,
14046 (submap_entry->is_shared
14047 || map->mapped_in_other_pmaps) ?
14048 PMAP_NULL : map->pmap,
14049 VM_MAP_PAGE_SIZE(map),
14050 submap_entry->vme_start,
14051 prot);
14052 vm_map_lookup_and_lock_object_copy_shadow_count++;
14053 vm_map_lookup_and_lock_object_copy_shadow_size += submap_entry_size;
14054 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_shadow_max) {
14055 vm_map_lookup_and_lock_object_copy_shadow_max = submap_entry_size;
14056 }
14057 }
14058
14059 /*
14060 * Adjust the fault offset to the submap entry.
14061 */
14062 copy_offset = (local_vaddr -
14063 submap_entry->vme_start +
14064 VME_OFFSET(submap_entry));
14065
14066 /* This works diffently than the */
14067 /* normal submap case. We go back */
14068 /* to the parent of the cow map and*/
14069 /* clip out the target portion of */
14070 /* the sub_map, substituting the */
14071 /* new copy object, */
14072
14073 subentry_protection = submap_entry->protection;
14074 subentry_max_protection = submap_entry->max_protection;
14075 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
14076 subentry_permanent = submap_entry->vme_permanent;
14077 subentry_csm_associated = submap_entry->csm_associated;
14078
14079 vm_map_unlock(map);
14080 submap_entry = NULL; /* not valid after map unlock */
14081
14082 local_start = old_start;
14083 local_end = old_end;
14084 map = cow_sub_map_parent;
14085 *var_map = cow_sub_map_parent;
14086 vaddr = cow_parent_vaddr;
14087 cow_sub_map_parent = NULL;
14088
14089 if (!vm_map_lookup_entry(map,
14090 vaddr, &entry)) {
14091 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
14092 vm_map_unlock(cow_sub_map_parent);
14093 }
14094 if ((*real_map != map)
14095 && (*real_map != cow_sub_map_parent)) {
14096 vm_map_unlock(*real_map);
14097 }
14098 *real_map = map;
14099 vm_object_deallocate(
14100 copy_object);
14101 copy_object = VM_OBJECT_NULL;
14102 vm_map_lock_write_to_read(map);
14103 DTRACE_VM4(submap_lookup_post_unlock,
14104 uint64_t, (uint64_t)entry->vme_start,
14105 uint64_t, (uint64_t)entry->vme_end,
14106 vm_map_offset_t, vaddr,
14107 int, object_copied);
14108 return KERN_INVALID_ADDRESS;
14109 }
14110
14111 /* clip out the portion of space */
14112 /* mapped by the sub map which */
14113 /* corresponds to the underlying */
14114 /* object */
14115
14116 /*
14117 * Clip (and unnest) the smallest nested chunk
14118 * possible around the faulting address...
14119 */
14120 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
14121 local_end = local_start + pmap_shared_region_size_min(map->pmap);
14122 /*
14123 * ... but don't go beyond the "old_start" to "old_end"
14124 * range, to avoid spanning over another VM region
14125 * with a possibly different VM object and/or offset.
14126 */
14127 if (local_start < old_start) {
14128 local_start = old_start;
14129 }
14130 if (local_end > old_end) {
14131 local_end = old_end;
14132 }
14133 /*
14134 * Adjust copy_offset to the start of the range.
14135 */
14136 copy_offset -= (vaddr - local_start);
14137
14138 vm_map_clip_start(map, entry, local_start);
14139 vm_map_clip_end(map, entry, local_end);
14140 if (entry->is_sub_map) {
14141 /* unnesting was done when clipping */
14142 assert(!entry->use_pmap);
14143 }
14144
14145 /* substitute copy object for */
14146 /* shared map entry */
14147 vm_map_deallocate(VME_SUBMAP(entry));
14148 assert(!entry->iokit_acct);
14149 entry->use_pmap = TRUE;
14150 VME_OBJECT_SET(entry, copy_object, false, 0);
14151
14152 /* propagate the submap entry's protections */
14153 if (entry->protection != VM_PROT_READ) {
14154 /*
14155 * Someone has already altered the top entry's
14156 * protections via vm_protect(VM_PROT_COPY).
14157 * Respect these new values and ignore the
14158 * submap entry's protections.
14159 */
14160 } else {
14161 /*
14162 * Regular copy-on-write: propagate the submap
14163 * entry's protections to the top map entry.
14164 */
14165 entry->protection |= subentry_protection;
14166 }
14167 entry->max_protection |= subentry_max_protection;
14168 /* propagate some attributes from subentry */
14169 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
14170 entry->vme_permanent = subentry_permanent;
14171 entry->csm_associated = subentry_csm_associated;
14172
14173 if ((entry->protection & VM_PROT_WRITE) &&
14174 (entry->protection & VM_PROT_EXECUTE) &&
14175 #if XNU_TARGET_OS_OSX
14176 map->pmap != kernel_pmap &&
14177 (vm_map_cs_enforcement(map)
14178 #if __arm64__
14179 || !VM_MAP_IS_EXOTIC(map)
14180 #endif /* __arm64__ */
14181 ) &&
14182 #endif /* XNU_TARGET_OS_OSX */
14183 #if CODE_SIGNING_MONITOR
14184 (csm_address_space_exempt(map->pmap) != KERN_SUCCESS) &&
14185 #endif
14186 !(entry->used_for_jit) &&
14187 VM_MAP_POLICY_WX_STRIP_X(map)) {
14188 DTRACE_VM3(cs_wx,
14189 uint64_t, (uint64_t)entry->vme_start,
14190 uint64_t, (uint64_t)entry->vme_end,
14191 vm_prot_t, entry->protection);
14192 printf("CODE SIGNING: %d[%s] %s:%d(0x%llx,0x%llx,0x%x) can't have both write and exec at the same time\n",
14193 proc_selfpid(),
14194 (get_bsdtask_info(current_task())
14195 ? proc_name_address(get_bsdtask_info(current_task()))
14196 : "?"),
14197 __FUNCTION__, __LINE__,
14198 #if DEVELOPMENT || DEBUG
14199 (uint64_t)entry->vme_start,
14200 (uint64_t)entry->vme_end,
14201 #else /* DEVELOPMENT || DEBUG */
14202 (uint64_t)0,
14203 (uint64_t)0,
14204 #endif /* DEVELOPMENT || DEBUG */
14205 entry->protection);
14206 entry->protection &= ~VM_PROT_EXECUTE;
14207 }
14208
14209 if (object_copied) {
14210 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
14211 entry->needs_copy = object_copied_needs_copy;
14212 entry->is_shared = FALSE;
14213 } else {
14214 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
14215 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
14216 assert(entry->wired_count == 0);
14217 VME_OFFSET_SET(entry, copy_offset);
14218 entry->needs_copy = TRUE;
14219 if (map != old_map) {
14220 entry->is_shared = TRUE;
14221 }
14222 }
14223 if (entry->inheritance == VM_INHERIT_SHARE) {
14224 entry->inheritance = VM_INHERIT_COPY;
14225 }
14226
14227 vm_map_lock_write_to_read(map);
14228 } else {
14229 if ((cow_sub_map_parent)
14230 && (cow_sub_map_parent != *real_map)
14231 && (cow_sub_map_parent != map)) {
14232 vm_map_unlock(cow_sub_map_parent);
14233 }
14234 entry = submap_entry;
14235 vaddr = local_vaddr;
14236 }
14237 }
14238
14239 /*
14240 * Check whether this task is allowed to have
14241 * this page.
14242 */
14243
14244 prot = entry->protection;
14245
14246 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
14247 /*
14248 * HACK -- if not a stack, then allow execution
14249 */
14250 prot |= VM_PROT_EXECUTE;
14251 }
14252
14253 if (mask_protections) {
14254 fault_type &= prot;
14255 if (fault_type == VM_PROT_NONE) {
14256 goto protection_failure;
14257 }
14258 }
14259 if (((fault_type & prot) != fault_type)
14260 #if __arm64__
14261 /* prefetch abort in execute-only page */
14262 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
14263 #elif defined(__x86_64__)
14264 /* Consider the UEXEC bit when handling an EXECUTE fault */
14265 && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
14266 #endif
14267 ) {
14268 protection_failure:
14269 if (*real_map != map) {
14270 vm_map_unlock(*real_map);
14271 }
14272 *real_map = map;
14273
14274 if ((fault_type & VM_PROT_EXECUTE) && prot) {
14275 log_stack_execution_failure((addr64_t)vaddr, prot);
14276 }
14277
14278 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
14279 DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
14280 /*
14281 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
14282 *
14283 * ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
14284 */
14285 return KERN_PROTECTION_FAILURE;
14286 }
14287
14288 /*
14289 * If this page is not pageable, we have to get
14290 * it for all possible accesses.
14291 */
14292
14293 *wired = (entry->wired_count != 0);
14294 if (*wired) {
14295 fault_type = prot;
14296 }
14297
14298 /*
14299 * If the entry was copy-on-write, we either ...
14300 */
14301
14302 if (entry->needs_copy) {
14303 /*
14304 * If we want to write the page, we may as well
14305 * handle that now since we've got the map locked.
14306 *
14307 * If we don't need to write the page, we just
14308 * demote the permissions allowed.
14309 */
14310
14311 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
14312 /*
14313 * Make a new object, and place it in the
14314 * object chain. Note that no new references
14315 * have appeared -- one just moved from the
14316 * map to the new object.
14317 */
14318
14319 if (vm_map_lock_read_to_write(map)) {
14320 vm_map_lock_read(map);
14321 goto RetryLookup;
14322 }
14323
14324 if (VME_OBJECT(entry)->shadowed == FALSE) {
14325 vm_object_lock(VME_OBJECT(entry));
14326 VME_OBJECT(entry)->shadowed = TRUE;
14327 vm_object_unlock(VME_OBJECT(entry));
14328 }
14329 VME_OBJECT_SHADOW(entry,
14330 (vm_map_size_t) (entry->vme_end -
14331 entry->vme_start),
14332 vm_map_always_shadow(map));
14333 entry->needs_copy = FALSE;
14334
14335 vm_map_lock_write_to_read(map);
14336 }
14337 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
14338 /*
14339 * We're attempting to read a copy-on-write
14340 * page -- don't allow writes.
14341 */
14342
14343 prot &= (~VM_PROT_WRITE);
14344 }
14345 }
14346
14347 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
14348 /*
14349 * We went through a "needs_copy" submap without triggering
14350 * a copy, so granting write access to the page would bypass
14351 * that submap's "needs_copy".
14352 */
14353 assert(!(fault_type & VM_PROT_WRITE));
14354 assert(!*wired);
14355 assert(!force_copy);
14356 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
14357 prot &= ~VM_PROT_WRITE;
14358 }
14359
14360 /*
14361 * Create an object if necessary.
14362 */
14363 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
14364 if (vm_map_lock_read_to_write(map)) {
14365 vm_map_lock_read(map);
14366 goto RetryLookup;
14367 }
14368
14369 VME_OBJECT_SET(entry,
14370 vm_object_allocate(
14371 (vm_map_size_t)(entry->vme_end -
14372 entry->vme_start)), false, 0);
14373 VME_OFFSET_SET(entry, 0);
14374 assert(entry->use_pmap);
14375 vm_map_lock_write_to_read(map);
14376 }
14377
14378 /*
14379 * Return the object/offset from this entry. If the entry
14380 * was copy-on-write or empty, it has been fixed up. Also
14381 * return the protection.
14382 */
14383
14384 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
14385 *object = VME_OBJECT(entry);
14386 *out_prot = prot;
14387 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
14388
14389 if (fault_info) {
14390 fault_info->interruptible = THREAD_UNINT; /* for now... */
14391 /* ... the caller will change "interruptible" if needed */
14392 fault_info->cluster_size = 0;
14393 fault_info->user_tag = VME_ALIAS(entry);
14394 fault_info->pmap_options = 0;
14395 if (entry->iokit_acct ||
14396 (!entry->is_sub_map && !entry->use_pmap)) {
14397 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14398 }
14399 fault_info->behavior = entry->behavior;
14400 fault_info->lo_offset = VME_OFFSET(entry);
14401 fault_info->hi_offset =
14402 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
14403 fault_info->no_cache = entry->no_cache;
14404 fault_info->stealth = FALSE;
14405 fault_info->io_sync = FALSE;
14406 if (entry->used_for_jit ||
14407 #if CODE_SIGNING_MONITOR
14408 (csm_address_space_exempt(map->pmap) == KERN_SUCCESS) ||
14409 #endif
14410 entry->vme_resilient_codesign) {
14411 fault_info->cs_bypass = TRUE;
14412 } else {
14413 fault_info->cs_bypass = FALSE;
14414 }
14415 fault_info->csm_associated = FALSE;
14416 #if CODE_SIGNING_MONITOR
14417 if (entry->csm_associated) {
14418 /*
14419 * The pmap layer will validate this page
14420 * before allowing it to be executed from.
14421 */
14422 fault_info->csm_associated = TRUE;
14423 }
14424 #endif
14425 fault_info->mark_zf_absent = FALSE;
14426 fault_info->batch_pmap_op = FALSE;
14427 fault_info->resilient_media = entry->vme_resilient_media;
14428 fault_info->fi_xnu_user_debug = entry->vme_xnu_user_debug;
14429 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14430 if (entry->translated_allow_execute) {
14431 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14432 }
14433 }
14434
14435 /*
14436 * Lock the object to prevent it from disappearing
14437 */
14438 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14439 if (contended == NULL) {
14440 vm_object_lock(*object);
14441 } else {
14442 *contended = vm_object_lock_check_contended(*object);
14443 }
14444 } else {
14445 vm_object_lock_shared(*object);
14446 }
14447
14448 /*
14449 * Save the version number
14450 */
14451
14452 out_version->main_timestamp = map->timestamp;
14453
14454 return KERN_SUCCESS;
14455 }
14456
14457
14458 /*
14459 * vm_map_verify:
14460 *
14461 * Verifies that the map in question has not changed
14462 * since the given version. The map has to be locked
14463 * ("shared" mode is fine) before calling this function
14464 * and it will be returned locked too.
14465 */
14466 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14467 vm_map_verify(
14468 vm_map_t map,
14469 vm_map_version_t *version) /* REF */
14470 {
14471 boolean_t result;
14472
14473 vm_map_lock_assert_held(map);
14474 result = (map->timestamp == version->main_timestamp);
14475
14476 return result;
14477 }
14478
14479 /*
14480 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14481 * Goes away after regular vm_region_recurse function migrates to
14482 * 64 bits
14483 * vm_region_recurse: A form of vm_region which follows the
14484 * submaps in a target map
14485 *
14486 */
14487
14488 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14489 vm_map_region_recurse_64(
14490 vm_map_t map,
14491 vm_map_offset_t *address, /* IN/OUT */
14492 vm_map_size_t *size, /* OUT */
14493 natural_t *nesting_depth, /* IN/OUT */
14494 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14495 mach_msg_type_number_t *count) /* IN/OUT */
14496 {
14497 mach_msg_type_number_t original_count;
14498 vm_region_extended_info_data_t extended;
14499 vm_map_entry_t tmp_entry;
14500 vm_map_offset_t user_address;
14501 unsigned int user_max_depth;
14502
14503 /*
14504 * "curr_entry" is the VM map entry preceding or including the
14505 * address we're looking for.
14506 * "curr_map" is the map or sub-map containing "curr_entry".
14507 * "curr_address" is the equivalent of the top map's "user_address"
14508 * in the current map.
14509 * "curr_offset" is the cumulated offset of "curr_map" in the
14510 * target task's address space.
14511 * "curr_depth" is the depth of "curr_map" in the chain of
14512 * sub-maps.
14513 *
14514 * "curr_max_below" and "curr_max_above" limit the range (around
14515 * "curr_address") we should take into account in the current (sub)map.
14516 * They limit the range to what's visible through the map entries
14517 * we've traversed from the top map to the current map.
14518 *
14519 */
14520 vm_map_entry_t curr_entry;
14521 vm_map_address_t curr_address;
14522 vm_map_offset_t curr_offset;
14523 vm_map_t curr_map;
14524 unsigned int curr_depth;
14525 vm_map_offset_t curr_max_below, curr_max_above;
14526 vm_map_offset_t curr_skip;
14527
14528 /*
14529 * "next_" is the same as "curr_" but for the VM region immediately
14530 * after the address we're looking for. We need to keep track of this
14531 * too because we want to return info about that region if the
14532 * address we're looking for is not mapped.
14533 */
14534 vm_map_entry_t next_entry;
14535 vm_map_offset_t next_offset;
14536 vm_map_offset_t next_address;
14537 vm_map_t next_map;
14538 unsigned int next_depth;
14539 vm_map_offset_t next_max_below, next_max_above;
14540 vm_map_offset_t next_skip;
14541
14542 boolean_t look_for_pages;
14543 vm_region_submap_short_info_64_t short_info;
14544 boolean_t do_region_footprint;
14545 int effective_page_size, effective_page_shift;
14546 boolean_t submap_needed_copy;
14547
14548 if (map == VM_MAP_NULL) {
14549 /* no address space to work on */
14550 return KERN_INVALID_ARGUMENT;
14551 }
14552
14553 effective_page_shift = vm_self_region_page_shift(map);
14554 effective_page_size = (1 << effective_page_shift);
14555
14556 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14557 /*
14558 * "info" structure is not big enough and
14559 * would overflow
14560 */
14561 return KERN_INVALID_ARGUMENT;
14562 }
14563
14564 do_region_footprint = task_self_region_footprint();
14565 original_count = *count;
14566
14567 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14568 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14569 look_for_pages = FALSE;
14570 short_info = (vm_region_submap_short_info_64_t) submap_info;
14571 submap_info = NULL;
14572 } else {
14573 look_for_pages = TRUE;
14574 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14575 short_info = NULL;
14576
14577 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14578 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14579 }
14580 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14581 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14582 }
14583 }
14584
14585 user_address = *address;
14586 user_max_depth = *nesting_depth;
14587 submap_needed_copy = FALSE;
14588
14589 if (not_in_kdp) {
14590 vm_map_lock_read(map);
14591 }
14592
14593 recurse_again:
14594 curr_entry = NULL;
14595 curr_map = map;
14596 curr_address = user_address;
14597 curr_offset = 0;
14598 curr_skip = 0;
14599 curr_depth = 0;
14600 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14601 curr_max_below = curr_address;
14602
14603 next_entry = NULL;
14604 next_map = NULL;
14605 next_address = 0;
14606 next_offset = 0;
14607 next_skip = 0;
14608 next_depth = 0;
14609 next_max_above = (vm_map_offset_t) -1;
14610 next_max_below = (vm_map_offset_t) -1;
14611
14612 for (;;) {
14613 if (vm_map_lookup_entry(curr_map,
14614 curr_address,
14615 &tmp_entry)) {
14616 /* tmp_entry contains the address we're looking for */
14617 curr_entry = tmp_entry;
14618 } else {
14619 vm_map_offset_t skip;
14620 /*
14621 * The address is not mapped. "tmp_entry" is the
14622 * map entry preceding the address. We want the next
14623 * one, if it exists.
14624 */
14625 curr_entry = tmp_entry->vme_next;
14626
14627 if (curr_entry == vm_map_to_entry(curr_map) ||
14628 (curr_entry->vme_start >=
14629 curr_address + curr_max_above)) {
14630 /* no next entry at this level: stop looking */
14631 if (not_in_kdp) {
14632 vm_map_unlock_read(curr_map);
14633 }
14634 curr_entry = NULL;
14635 curr_map = NULL;
14636 curr_skip = 0;
14637 curr_offset = 0;
14638 curr_depth = 0;
14639 curr_max_above = 0;
14640 curr_max_below = 0;
14641 break;
14642 }
14643
14644 /* adjust current address and offset */
14645 skip = curr_entry->vme_start - curr_address;
14646 curr_address = curr_entry->vme_start;
14647 curr_skip += skip;
14648 curr_offset += skip;
14649 curr_max_above -= skip;
14650 curr_max_below = 0;
14651 }
14652
14653 /*
14654 * Is the next entry at this level closer to the address (or
14655 * deeper in the submap chain) than the one we had
14656 * so far ?
14657 */
14658 tmp_entry = curr_entry->vme_next;
14659 if (tmp_entry == vm_map_to_entry(curr_map)) {
14660 /* no next entry at this level */
14661 } else if (tmp_entry->vme_start >=
14662 curr_address + curr_max_above) {
14663 /*
14664 * tmp_entry is beyond the scope of what we mapped of
14665 * this submap in the upper level: ignore it.
14666 */
14667 } else if ((next_entry == NULL) ||
14668 (tmp_entry->vme_start + curr_offset <=
14669 next_entry->vme_start + next_offset)) {
14670 /*
14671 * We didn't have a "next_entry" or this one is
14672 * closer to the address we're looking for:
14673 * use this "tmp_entry" as the new "next_entry".
14674 */
14675 if (next_entry != NULL) {
14676 /* unlock the last "next_map" */
14677 if (next_map != curr_map && not_in_kdp) {
14678 vm_map_unlock_read(next_map);
14679 }
14680 }
14681 next_entry = tmp_entry;
14682 next_map = curr_map;
14683 next_depth = curr_depth;
14684 next_address = next_entry->vme_start;
14685 next_skip = curr_skip;
14686 next_skip += (next_address - curr_address);
14687 next_offset = curr_offset;
14688 next_offset += (next_address - curr_address);
14689 next_max_above = MIN(next_max_above, curr_max_above);
14690 next_max_above = MIN(next_max_above,
14691 next_entry->vme_end - next_address);
14692 next_max_below = MIN(next_max_below, curr_max_below);
14693 next_max_below = MIN(next_max_below,
14694 next_address - next_entry->vme_start);
14695 }
14696
14697 /*
14698 * "curr_max_{above,below}" allow us to keep track of the
14699 * portion of the submap that is actually mapped at this level:
14700 * the rest of that submap is irrelevant to us, since it's not
14701 * mapped here.
14702 * The relevant portion of the map starts at
14703 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14704 */
14705 curr_max_above = MIN(curr_max_above,
14706 curr_entry->vme_end - curr_address);
14707 curr_max_below = MIN(curr_max_below,
14708 curr_address - curr_entry->vme_start);
14709
14710 if (!curr_entry->is_sub_map ||
14711 curr_depth >= user_max_depth) {
14712 /*
14713 * We hit a leaf map or we reached the maximum depth
14714 * we could, so stop looking. Keep the current map
14715 * locked.
14716 */
14717 break;
14718 }
14719
14720 /*
14721 * Get down to the next submap level.
14722 */
14723
14724 if (curr_entry->needs_copy) {
14725 /* everything below this is effectively copy-on-write */
14726 submap_needed_copy = TRUE;
14727 }
14728
14729 /*
14730 * Lock the next level and unlock the current level,
14731 * unless we need to keep it locked to access the "next_entry"
14732 * later.
14733 */
14734 if (not_in_kdp) {
14735 vm_map_lock_read(VME_SUBMAP(curr_entry));
14736 }
14737 if (curr_map == next_map) {
14738 /* keep "next_map" locked in case we need it */
14739 } else {
14740 /* release this map */
14741 if (not_in_kdp) {
14742 vm_map_unlock_read(curr_map);
14743 }
14744 }
14745
14746 /*
14747 * Adjust the offset. "curr_entry" maps the submap
14748 * at relative address "curr_entry->vme_start" in the
14749 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14750 * bytes of the submap.
14751 * "curr_offset" always represents the offset of a virtual
14752 * address in the curr_map relative to the absolute address
14753 * space (i.e. the top-level VM map).
14754 */
14755 curr_offset +=
14756 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14757 curr_address = user_address + curr_offset;
14758 /* switch to the submap */
14759 curr_map = VME_SUBMAP(curr_entry);
14760 curr_depth++;
14761 curr_entry = NULL;
14762 }
14763
14764 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14765 // so probably should be a real 32b ID vs. ptr.
14766 // Current users just check for equality
14767
14768 if (curr_entry == NULL) {
14769 /* no VM region contains the address... */
14770
14771 if (do_region_footprint && /* we want footprint numbers */
14772 next_entry == NULL && /* & there are no more regions */
14773 /* & we haven't already provided our fake region: */
14774 user_address <= vm_map_last_entry(map)->vme_end) {
14775 ledger_amount_t ledger_resident, ledger_compressed;
14776
14777 /*
14778 * Add a fake memory region to account for
14779 * purgeable and/or ledger-tagged memory that
14780 * counts towards this task's memory footprint,
14781 * i.e. the resident/compressed pages of non-volatile
14782 * objects owned by that task.
14783 */
14784 task_ledgers_footprint(map->pmap->ledger,
14785 &ledger_resident,
14786 &ledger_compressed);
14787 if (ledger_resident + ledger_compressed == 0) {
14788 /* no purgeable memory usage to report */
14789 return KERN_INVALID_ADDRESS;
14790 }
14791 /* fake region to show nonvolatile footprint */
14792 if (look_for_pages) {
14793 submap_info->protection = VM_PROT_DEFAULT;
14794 submap_info->max_protection = VM_PROT_DEFAULT;
14795 submap_info->inheritance = VM_INHERIT_DEFAULT;
14796 submap_info->offset = 0;
14797 submap_info->user_tag = -1;
14798 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14799 submap_info->pages_shared_now_private = 0;
14800 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14801 submap_info->pages_dirtied = submap_info->pages_resident;
14802 submap_info->ref_count = 1;
14803 submap_info->shadow_depth = 0;
14804 submap_info->external_pager = 0;
14805 submap_info->share_mode = SM_PRIVATE;
14806 if (submap_needed_copy) {
14807 submap_info->share_mode = SM_COW;
14808 }
14809 submap_info->is_submap = 0;
14810 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14811 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14812 submap_info->user_wired_count = 0;
14813 submap_info->pages_reusable = 0;
14814 } else {
14815 short_info->user_tag = -1;
14816 short_info->offset = 0;
14817 short_info->protection = VM_PROT_DEFAULT;
14818 short_info->inheritance = VM_INHERIT_DEFAULT;
14819 short_info->max_protection = VM_PROT_DEFAULT;
14820 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14821 short_info->user_wired_count = 0;
14822 short_info->is_submap = 0;
14823 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14824 short_info->external_pager = 0;
14825 short_info->shadow_depth = 0;
14826 short_info->share_mode = SM_PRIVATE;
14827 if (submap_needed_copy) {
14828 short_info->share_mode = SM_COW;
14829 }
14830 short_info->ref_count = 1;
14831 }
14832 *nesting_depth = 0;
14833 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14834 // *address = user_address;
14835 *address = vm_map_last_entry(map)->vme_end;
14836 return KERN_SUCCESS;
14837 }
14838
14839 if (next_entry == NULL) {
14840 /* ... and no VM region follows it either */
14841 return KERN_INVALID_ADDRESS;
14842 }
14843 /* ... gather info about the next VM region */
14844 curr_entry = next_entry;
14845 curr_map = next_map; /* still locked ... */
14846 curr_address = next_address;
14847 curr_skip = next_skip;
14848 curr_offset = next_offset;
14849 curr_depth = next_depth;
14850 curr_max_above = next_max_above;
14851 curr_max_below = next_max_below;
14852 } else {
14853 /* we won't need "next_entry" after all */
14854 if (next_entry != NULL) {
14855 /* release "next_map" */
14856 if (next_map != curr_map && not_in_kdp) {
14857 vm_map_unlock_read(next_map);
14858 }
14859 }
14860 }
14861 next_entry = NULL;
14862 next_map = NULL;
14863 next_offset = 0;
14864 next_skip = 0;
14865 next_depth = 0;
14866 next_max_below = -1;
14867 next_max_above = -1;
14868
14869 if (curr_entry->is_sub_map &&
14870 curr_depth < user_max_depth) {
14871 /*
14872 * We're not as deep as we could be: we must have
14873 * gone back up after not finding anything mapped
14874 * below the original top-level map entry's.
14875 * Let's move "curr_address" forward and recurse again.
14876 */
14877 user_address = curr_address;
14878 goto recurse_again;
14879 }
14880
14881 *nesting_depth = curr_depth;
14882 *size = curr_max_above + curr_max_below;
14883 *address = user_address + curr_skip - curr_max_below;
14884
14885 if (look_for_pages) {
14886 submap_info->user_tag = VME_ALIAS(curr_entry);
14887 submap_info->offset = VME_OFFSET(curr_entry);
14888 submap_info->protection = curr_entry->protection;
14889 submap_info->inheritance = curr_entry->inheritance;
14890 submap_info->max_protection = curr_entry->max_protection;
14891 submap_info->behavior = curr_entry->behavior;
14892 submap_info->user_wired_count = curr_entry->user_wired_count;
14893 submap_info->is_submap = curr_entry->is_sub_map;
14894 if (curr_entry->is_sub_map) {
14895 submap_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14896 } else {
14897 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14898 }
14899 } else {
14900 short_info->user_tag = VME_ALIAS(curr_entry);
14901 short_info->offset = VME_OFFSET(curr_entry);
14902 short_info->protection = curr_entry->protection;
14903 short_info->inheritance = curr_entry->inheritance;
14904 short_info->max_protection = curr_entry->max_protection;
14905 short_info->behavior = curr_entry->behavior;
14906 short_info->user_wired_count = curr_entry->user_wired_count;
14907 short_info->is_submap = curr_entry->is_sub_map;
14908 if (curr_entry->is_sub_map) {
14909 short_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14910 } else {
14911 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14912 }
14913 }
14914
14915 extended.pages_resident = 0;
14916 extended.pages_swapped_out = 0;
14917 extended.pages_shared_now_private = 0;
14918 extended.pages_dirtied = 0;
14919 extended.pages_reusable = 0;
14920 extended.external_pager = 0;
14921 extended.shadow_depth = 0;
14922 extended.share_mode = SM_EMPTY;
14923 extended.ref_count = 0;
14924
14925 if (not_in_kdp) {
14926 if (!curr_entry->is_sub_map) {
14927 vm_map_offset_t range_start, range_end;
14928 range_start = MAX((curr_address - curr_max_below),
14929 curr_entry->vme_start);
14930 range_end = MIN((curr_address + curr_max_above),
14931 curr_entry->vme_end);
14932 vm_map_region_walk(curr_map,
14933 range_start,
14934 curr_entry,
14935 (VME_OFFSET(curr_entry) +
14936 (range_start -
14937 curr_entry->vme_start)),
14938 range_end - range_start,
14939 &extended,
14940 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14941 if (extended.external_pager &&
14942 extended.ref_count == 2 &&
14943 extended.share_mode == SM_SHARED) {
14944 extended.share_mode = SM_PRIVATE;
14945 }
14946 if (submap_needed_copy) {
14947 extended.share_mode = SM_COW;
14948 }
14949 } else {
14950 if (curr_entry->use_pmap) {
14951 extended.share_mode = SM_TRUESHARED;
14952 } else {
14953 extended.share_mode = SM_PRIVATE;
14954 }
14955 extended.ref_count = os_ref_get_count_raw(&VME_SUBMAP(curr_entry)->map_refcnt);
14956 }
14957 }
14958
14959 if (look_for_pages) {
14960 submap_info->pages_resident = extended.pages_resident;
14961 submap_info->pages_swapped_out = extended.pages_swapped_out;
14962 submap_info->pages_shared_now_private =
14963 extended.pages_shared_now_private;
14964 submap_info->pages_dirtied = extended.pages_dirtied;
14965 submap_info->external_pager = extended.external_pager;
14966 submap_info->shadow_depth = extended.shadow_depth;
14967 submap_info->share_mode = extended.share_mode;
14968 submap_info->ref_count = extended.ref_count;
14969
14970 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14971 submap_info->pages_reusable = extended.pages_reusable;
14972 }
14973 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14974 if (curr_entry->is_sub_map) {
14975 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_SUBMAP(curr_entry));
14976 } else if (VME_OBJECT(curr_entry)) {
14977 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry));
14978 } else {
14979 submap_info->object_id_full = 0ull;
14980 }
14981 }
14982 } else {
14983 short_info->external_pager = extended.external_pager;
14984 short_info->shadow_depth = extended.shadow_depth;
14985 short_info->share_mode = extended.share_mode;
14986 short_info->ref_count = extended.ref_count;
14987 }
14988
14989 if (not_in_kdp) {
14990 vm_map_unlock_read(curr_map);
14991 }
14992
14993 return KERN_SUCCESS;
14994 }
14995
14996 /*
14997 * vm_region:
14998 *
14999 * User call to obtain information about a region in
15000 * a task's address map. Currently, only one flavor is
15001 * supported.
15002 *
15003 * XXX The reserved and behavior fields cannot be filled
15004 * in until the vm merge from the IK is completed, and
15005 * vm_reserve is implemented.
15006 */
15007
15008 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)15009 vm_map_region(
15010 vm_map_t map,
15011 vm_map_offset_t *address, /* IN/OUT */
15012 vm_map_size_t *size, /* OUT */
15013 vm_region_flavor_t flavor, /* IN */
15014 vm_region_info_t info, /* OUT */
15015 mach_msg_type_number_t *count, /* IN/OUT */
15016 mach_port_t *object_name) /* OUT */
15017 {
15018 vm_map_entry_t tmp_entry;
15019 vm_map_entry_t entry;
15020 vm_map_offset_t start;
15021
15022 if (map == VM_MAP_NULL) {
15023 return KERN_INVALID_ARGUMENT;
15024 }
15025
15026 switch (flavor) {
15027 case VM_REGION_BASIC_INFO:
15028 /* legacy for old 32-bit objects info */
15029 {
15030 vm_region_basic_info_t basic;
15031
15032 if (*count < VM_REGION_BASIC_INFO_COUNT) {
15033 return KERN_INVALID_ARGUMENT;
15034 }
15035
15036 basic = (vm_region_basic_info_t) info;
15037 *count = VM_REGION_BASIC_INFO_COUNT;
15038
15039 vm_map_lock_read(map);
15040
15041 start = *address;
15042 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15043 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15044 vm_map_unlock_read(map);
15045 return KERN_INVALID_ADDRESS;
15046 }
15047 } else {
15048 entry = tmp_entry;
15049 }
15050
15051 start = entry->vme_start;
15052
15053 basic->offset = (uint32_t)VME_OFFSET(entry);
15054 basic->protection = entry->protection;
15055 basic->inheritance = entry->inheritance;
15056 basic->max_protection = entry->max_protection;
15057 basic->behavior = entry->behavior;
15058 basic->user_wired_count = entry->user_wired_count;
15059 basic->reserved = entry->is_sub_map;
15060 *address = start;
15061 *size = (entry->vme_end - start);
15062
15063 if (object_name) {
15064 *object_name = IP_NULL;
15065 }
15066 if (entry->is_sub_map) {
15067 basic->shared = FALSE;
15068 } else {
15069 basic->shared = entry->is_shared;
15070 }
15071
15072 vm_map_unlock_read(map);
15073 return KERN_SUCCESS;
15074 }
15075
15076 case VM_REGION_BASIC_INFO_64:
15077 {
15078 vm_region_basic_info_64_t basic;
15079
15080 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
15081 return KERN_INVALID_ARGUMENT;
15082 }
15083
15084 basic = (vm_region_basic_info_64_t) info;
15085 *count = VM_REGION_BASIC_INFO_COUNT_64;
15086
15087 vm_map_lock_read(map);
15088
15089 start = *address;
15090 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15091 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15092 vm_map_unlock_read(map);
15093 return KERN_INVALID_ADDRESS;
15094 }
15095 } else {
15096 entry = tmp_entry;
15097 }
15098
15099 start = entry->vme_start;
15100
15101 basic->offset = VME_OFFSET(entry);
15102 basic->protection = entry->protection;
15103 basic->inheritance = entry->inheritance;
15104 basic->max_protection = entry->max_protection;
15105 basic->behavior = entry->behavior;
15106 basic->user_wired_count = entry->user_wired_count;
15107 basic->reserved = entry->is_sub_map;
15108 *address = start;
15109 *size = (entry->vme_end - start);
15110
15111 if (object_name) {
15112 *object_name = IP_NULL;
15113 }
15114 if (entry->is_sub_map) {
15115 basic->shared = FALSE;
15116 } else {
15117 basic->shared = entry->is_shared;
15118 }
15119
15120 vm_map_unlock_read(map);
15121 return KERN_SUCCESS;
15122 }
15123 case VM_REGION_EXTENDED_INFO:
15124 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
15125 return KERN_INVALID_ARGUMENT;
15126 }
15127 OS_FALLTHROUGH;
15128 case VM_REGION_EXTENDED_INFO__legacy:
15129 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
15130 return KERN_INVALID_ARGUMENT;
15131 }
15132
15133 {
15134 vm_region_extended_info_t extended;
15135 mach_msg_type_number_t original_count;
15136 int effective_page_size, effective_page_shift;
15137
15138 extended = (vm_region_extended_info_t) info;
15139
15140 effective_page_shift = vm_self_region_page_shift(map);
15141 effective_page_size = (1 << effective_page_shift);
15142
15143 vm_map_lock_read(map);
15144
15145 start = *address;
15146 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15147 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15148 vm_map_unlock_read(map);
15149 return KERN_INVALID_ADDRESS;
15150 }
15151 } else {
15152 entry = tmp_entry;
15153 }
15154 start = entry->vme_start;
15155
15156 extended->protection = entry->protection;
15157 extended->user_tag = VME_ALIAS(entry);
15158 extended->pages_resident = 0;
15159 extended->pages_swapped_out = 0;
15160 extended->pages_shared_now_private = 0;
15161 extended->pages_dirtied = 0;
15162 extended->external_pager = 0;
15163 extended->shadow_depth = 0;
15164
15165 original_count = *count;
15166 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
15167 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
15168 } else {
15169 extended->pages_reusable = 0;
15170 *count = VM_REGION_EXTENDED_INFO_COUNT;
15171 }
15172
15173 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
15174
15175 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
15176 extended->share_mode = SM_PRIVATE;
15177 }
15178
15179 if (object_name) {
15180 *object_name = IP_NULL;
15181 }
15182 *address = start;
15183 *size = (entry->vme_end - start);
15184
15185 vm_map_unlock_read(map);
15186 return KERN_SUCCESS;
15187 }
15188 case VM_REGION_TOP_INFO:
15189 {
15190 vm_region_top_info_t top;
15191
15192 if (*count < VM_REGION_TOP_INFO_COUNT) {
15193 return KERN_INVALID_ARGUMENT;
15194 }
15195
15196 top = (vm_region_top_info_t) info;
15197 *count = VM_REGION_TOP_INFO_COUNT;
15198
15199 vm_map_lock_read(map);
15200
15201 start = *address;
15202 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15203 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15204 vm_map_unlock_read(map);
15205 return KERN_INVALID_ADDRESS;
15206 }
15207 } else {
15208 entry = tmp_entry;
15209 }
15210 start = entry->vme_start;
15211
15212 top->private_pages_resident = 0;
15213 top->shared_pages_resident = 0;
15214
15215 vm_map_region_top_walk(entry, top);
15216
15217 if (object_name) {
15218 *object_name = IP_NULL;
15219 }
15220 *address = start;
15221 *size = (entry->vme_end - start);
15222
15223 vm_map_unlock_read(map);
15224 return KERN_SUCCESS;
15225 }
15226 default:
15227 return KERN_INVALID_ARGUMENT;
15228 }
15229 }
15230
15231 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
15232 MIN((entry_size), \
15233 ((obj)->all_reusable ? \
15234 (obj)->wired_page_count : \
15235 (obj)->resident_page_count - (obj)->reusable_page_count))
15236
15237 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)15238 vm_map_region_top_walk(
15239 vm_map_entry_t entry,
15240 vm_region_top_info_t top)
15241 {
15242 if (entry->is_sub_map || VME_OBJECT(entry) == 0) {
15243 top->share_mode = SM_EMPTY;
15244 top->ref_count = 0;
15245 top->obj_id = 0;
15246 return;
15247 }
15248
15249 {
15250 struct vm_object *obj, *tmp_obj;
15251 int ref_count;
15252 uint32_t entry_size;
15253
15254 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
15255
15256 obj = VME_OBJECT(entry);
15257
15258 vm_object_lock(obj);
15259
15260 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15261 ref_count--;
15262 }
15263
15264 assert(obj->reusable_page_count <= obj->resident_page_count);
15265 if (obj->shadow) {
15266 if (ref_count == 1) {
15267 top->private_pages_resident =
15268 OBJ_RESIDENT_COUNT(obj, entry_size);
15269 } else {
15270 top->shared_pages_resident =
15271 OBJ_RESIDENT_COUNT(obj, entry_size);
15272 }
15273 top->ref_count = ref_count;
15274 top->share_mode = SM_COW;
15275
15276 while ((tmp_obj = obj->shadow)) {
15277 vm_object_lock(tmp_obj);
15278 vm_object_unlock(obj);
15279 obj = tmp_obj;
15280
15281 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15282 ref_count--;
15283 }
15284
15285 assert(obj->reusable_page_count <= obj->resident_page_count);
15286 top->shared_pages_resident +=
15287 OBJ_RESIDENT_COUNT(obj, entry_size);
15288 top->ref_count += ref_count - 1;
15289 }
15290 } else {
15291 if (entry->superpage_size) {
15292 top->share_mode = SM_LARGE_PAGE;
15293 top->shared_pages_resident = 0;
15294 top->private_pages_resident = entry_size;
15295 } else if (entry->needs_copy) {
15296 top->share_mode = SM_COW;
15297 top->shared_pages_resident =
15298 OBJ_RESIDENT_COUNT(obj, entry_size);
15299 } else {
15300 if (ref_count == 1 ||
15301 (ref_count == 2 && obj->named)) {
15302 top->share_mode = SM_PRIVATE;
15303 top->private_pages_resident =
15304 OBJ_RESIDENT_COUNT(obj,
15305 entry_size);
15306 } else {
15307 top->share_mode = SM_SHARED;
15308 top->shared_pages_resident =
15309 OBJ_RESIDENT_COUNT(obj,
15310 entry_size);
15311 }
15312 }
15313 top->ref_count = ref_count;
15314 }
15315 /* XXX K64: obj_id will be truncated */
15316 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
15317
15318 vm_object_unlock(obj);
15319 }
15320 }
15321
15322 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)15323 vm_map_region_walk(
15324 vm_map_t map,
15325 vm_map_offset_t va,
15326 vm_map_entry_t entry,
15327 vm_object_offset_t offset,
15328 vm_object_size_t range,
15329 vm_region_extended_info_t extended,
15330 boolean_t look_for_pages,
15331 mach_msg_type_number_t count)
15332 {
15333 struct vm_object *obj, *tmp_obj;
15334 vm_map_offset_t last_offset;
15335 int i;
15336 int ref_count;
15337 struct vm_object *shadow_object;
15338 unsigned short shadow_depth;
15339 boolean_t do_region_footprint;
15340 int effective_page_size, effective_page_shift;
15341 vm_map_offset_t effective_page_mask;
15342
15343 do_region_footprint = task_self_region_footprint();
15344
15345 if ((entry->is_sub_map) ||
15346 (VME_OBJECT(entry) == 0) ||
15347 (VME_OBJECT(entry)->phys_contiguous &&
15348 !entry->superpage_size)) {
15349 extended->share_mode = SM_EMPTY;
15350 extended->ref_count = 0;
15351 return;
15352 }
15353
15354 if (entry->superpage_size) {
15355 extended->shadow_depth = 0;
15356 extended->share_mode = SM_LARGE_PAGE;
15357 extended->ref_count = 1;
15358 extended->external_pager = 0;
15359
15360 /* TODO4K: Superpage in 4k mode? */
15361 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
15362 extended->shadow_depth = 0;
15363 return;
15364 }
15365
15366 effective_page_shift = vm_self_region_page_shift(map);
15367 effective_page_size = (1 << effective_page_shift);
15368 effective_page_mask = effective_page_size - 1;
15369
15370 offset = vm_map_trunc_page(offset, effective_page_mask);
15371
15372 obj = VME_OBJECT(entry);
15373
15374 vm_object_lock(obj);
15375
15376 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15377 ref_count--;
15378 }
15379
15380 if (look_for_pages) {
15381 for (last_offset = offset + range;
15382 offset < last_offset;
15383 offset += effective_page_size, va += effective_page_size) {
15384 if (do_region_footprint) {
15385 int disp;
15386
15387 disp = 0;
15388 if (map->has_corpse_footprint) {
15389 /*
15390 * Query the page info data we saved
15391 * while forking the corpse.
15392 */
15393 vm_map_corpse_footprint_query_page_info(
15394 map,
15395 va,
15396 &disp);
15397 } else {
15398 /*
15399 * Query the pmap.
15400 */
15401 vm_map_footprint_query_page_info(
15402 map,
15403 entry,
15404 va,
15405 &disp);
15406 }
15407 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
15408 extended->pages_resident++;
15409 }
15410 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
15411 extended->pages_reusable++;
15412 }
15413 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
15414 extended->pages_dirtied++;
15415 }
15416 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
15417 extended->pages_swapped_out++;
15418 }
15419 continue;
15420 }
15421
15422 vm_map_region_look_for_page(map, va, obj,
15423 vm_object_trunc_page(offset), ref_count,
15424 0, extended, count);
15425 }
15426
15427 if (do_region_footprint) {
15428 goto collect_object_info;
15429 }
15430 } else {
15431 collect_object_info:
15432 shadow_object = obj->shadow;
15433 shadow_depth = 0;
15434
15435 if (!(obj->internal)) {
15436 extended->external_pager = 1;
15437 }
15438
15439 if (shadow_object != VM_OBJECT_NULL) {
15440 vm_object_lock(shadow_object);
15441 for (;
15442 shadow_object != VM_OBJECT_NULL;
15443 shadow_depth++) {
15444 vm_object_t next_shadow;
15445
15446 if (!(shadow_object->internal)) {
15447 extended->external_pager = 1;
15448 }
15449
15450 next_shadow = shadow_object->shadow;
15451 if (next_shadow) {
15452 vm_object_lock(next_shadow);
15453 }
15454 vm_object_unlock(shadow_object);
15455 shadow_object = next_shadow;
15456 }
15457 }
15458 extended->shadow_depth = shadow_depth;
15459 }
15460
15461 if (extended->shadow_depth || entry->needs_copy) {
15462 extended->share_mode = SM_COW;
15463 } else {
15464 if (ref_count == 1) {
15465 extended->share_mode = SM_PRIVATE;
15466 } else {
15467 if (obj->true_share) {
15468 extended->share_mode = SM_TRUESHARED;
15469 } else {
15470 extended->share_mode = SM_SHARED;
15471 }
15472 }
15473 }
15474 extended->ref_count = ref_count - extended->shadow_depth;
15475
15476 for (i = 0; i < extended->shadow_depth; i++) {
15477 if ((tmp_obj = obj->shadow) == 0) {
15478 break;
15479 }
15480 vm_object_lock(tmp_obj);
15481 vm_object_unlock(obj);
15482
15483 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15484 ref_count--;
15485 }
15486
15487 extended->ref_count += ref_count;
15488 obj = tmp_obj;
15489 }
15490 vm_object_unlock(obj);
15491
15492 if (extended->share_mode == SM_SHARED) {
15493 vm_map_entry_t cur;
15494 vm_map_entry_t last;
15495 int my_refs;
15496
15497 obj = VME_OBJECT(entry);
15498 last = vm_map_to_entry(map);
15499 my_refs = 0;
15500
15501 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15502 ref_count--;
15503 }
15504 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15505 my_refs += vm_map_region_count_obj_refs(cur, obj);
15506 }
15507
15508 if (my_refs == ref_count) {
15509 extended->share_mode = SM_PRIVATE_ALIASED;
15510 } else if (my_refs > 1) {
15511 extended->share_mode = SM_SHARED_ALIASED;
15512 }
15513 }
15514 }
15515
15516
15517 /* object is locked on entry and locked on return */
15518
15519
15520 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15521 vm_map_region_look_for_page(
15522 __unused vm_map_t map,
15523 __unused vm_map_offset_t va,
15524 vm_object_t object,
15525 vm_object_offset_t offset,
15526 int max_refcnt,
15527 unsigned short depth,
15528 vm_region_extended_info_t extended,
15529 mach_msg_type_number_t count)
15530 {
15531 vm_page_t p;
15532 vm_object_t shadow;
15533 int ref_count;
15534 vm_object_t caller_object;
15535
15536 shadow = object->shadow;
15537 caller_object = object;
15538
15539
15540 while (TRUE) {
15541 if (!(object->internal)) {
15542 extended->external_pager = 1;
15543 }
15544
15545 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15546 if (shadow && (max_refcnt == 1)) {
15547 extended->pages_shared_now_private++;
15548 }
15549
15550 if (!p->vmp_fictitious &&
15551 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15552 extended->pages_dirtied++;
15553 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15554 if (p->vmp_reusable || object->all_reusable) {
15555 extended->pages_reusable++;
15556 }
15557 }
15558
15559 extended->pages_resident++;
15560
15561 if (object != caller_object) {
15562 vm_object_unlock(object);
15563 }
15564
15565 return;
15566 }
15567 if (object->internal &&
15568 object->alive &&
15569 !object->terminating &&
15570 object->pager_ready) {
15571 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15572 == VM_EXTERNAL_STATE_EXISTS) {
15573 /* the pager has that page */
15574 extended->pages_swapped_out++;
15575 if (object != caller_object) {
15576 vm_object_unlock(object);
15577 }
15578 return;
15579 }
15580 }
15581
15582 if (shadow) {
15583 vm_object_lock(shadow);
15584
15585 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15586 ref_count--;
15587 }
15588
15589 if (++depth > extended->shadow_depth) {
15590 extended->shadow_depth = depth;
15591 }
15592
15593 if (ref_count > max_refcnt) {
15594 max_refcnt = ref_count;
15595 }
15596
15597 if (object != caller_object) {
15598 vm_object_unlock(object);
15599 }
15600
15601 offset = offset + object->vo_shadow_offset;
15602 object = shadow;
15603 shadow = object->shadow;
15604 continue;
15605 }
15606 if (object != caller_object) {
15607 vm_object_unlock(object);
15608 }
15609 break;
15610 }
15611 }
15612
15613 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15614 vm_map_region_count_obj_refs(
15615 vm_map_entry_t entry,
15616 vm_object_t object)
15617 {
15618 int ref_count;
15619 vm_object_t chk_obj;
15620 vm_object_t tmp_obj;
15621
15622 if (entry->is_sub_map || VME_OBJECT(entry) == VM_OBJECT_NULL) {
15623 return 0;
15624 }
15625
15626 ref_count = 0;
15627 chk_obj = VME_OBJECT(entry);
15628 vm_object_lock(chk_obj);
15629
15630 while (chk_obj) {
15631 if (chk_obj == object) {
15632 ref_count++;
15633 }
15634 tmp_obj = chk_obj->shadow;
15635 if (tmp_obj) {
15636 vm_object_lock(tmp_obj);
15637 }
15638 vm_object_unlock(chk_obj);
15639
15640 chk_obj = tmp_obj;
15641 }
15642
15643 return ref_count;
15644 }
15645
15646
15647 /*
15648 * Routine: vm_map_simplify
15649 *
15650 * Description:
15651 * Attempt to simplify the map representation in
15652 * the vicinity of the given starting address.
15653 * Note:
15654 * This routine is intended primarily to keep the
15655 * kernel maps more compact -- they generally don't
15656 * benefit from the "expand a map entry" technology
15657 * at allocation time because the adjacent entry
15658 * is often wired down.
15659 */
15660 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15661 vm_map_simplify_entry(
15662 vm_map_t map,
15663 vm_map_entry_t this_entry)
15664 {
15665 vm_map_entry_t prev_entry;
15666
15667 prev_entry = this_entry->vme_prev;
15668
15669 if ((this_entry != vm_map_to_entry(map)) &&
15670 (prev_entry != vm_map_to_entry(map)) &&
15671
15672 (prev_entry->vme_end == this_entry->vme_start) &&
15673
15674 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15675 (prev_entry->vme_object_value == this_entry->vme_object_value) &&
15676 (prev_entry->vme_kernel_object == this_entry->vme_kernel_object) &&
15677 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15678 prev_entry->vme_start))
15679 == VME_OFFSET(this_entry)) &&
15680
15681 (prev_entry->behavior == this_entry->behavior) &&
15682 (prev_entry->needs_copy == this_entry->needs_copy) &&
15683 (prev_entry->protection == this_entry->protection) &&
15684 (prev_entry->max_protection == this_entry->max_protection) &&
15685 (prev_entry->inheritance == this_entry->inheritance) &&
15686 (prev_entry->use_pmap == this_entry->use_pmap) &&
15687 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15688 (prev_entry->no_cache == this_entry->no_cache) &&
15689 (prev_entry->vme_permanent == this_entry->vme_permanent) &&
15690 (prev_entry->map_aligned == this_entry->map_aligned) &&
15691 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15692 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15693 #if __arm64e__
15694 (prev_entry->used_for_tpro == this_entry->used_for_tpro) &&
15695 #endif
15696 (prev_entry->csm_associated == this_entry->csm_associated) &&
15697 (prev_entry->vme_xnu_user_debug == this_entry->vme_xnu_user_debug) &&
15698 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15699 (prev_entry->vme_resilient_codesign ==
15700 this_entry->vme_resilient_codesign) &&
15701 (prev_entry->vme_resilient_media ==
15702 this_entry->vme_resilient_media) &&
15703 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15704 (prev_entry->translated_allow_execute == this_entry->translated_allow_execute) &&
15705
15706 (prev_entry->wired_count == this_entry->wired_count) &&
15707 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15708
15709 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15710 (prev_entry->in_transition == FALSE) &&
15711 (this_entry->in_transition == FALSE) &&
15712 (prev_entry->needs_wakeup == FALSE) &&
15713 (this_entry->needs_wakeup == FALSE) &&
15714 (prev_entry->is_shared == this_entry->is_shared) &&
15715 (prev_entry->superpage_size == FALSE) &&
15716 (this_entry->superpage_size == FALSE)
15717 ) {
15718 if (prev_entry->vme_permanent) {
15719 assert(this_entry->vme_permanent);
15720 prev_entry->vme_permanent = false;
15721 }
15722 vm_map_store_entry_unlink(map, prev_entry, true);
15723 assert(prev_entry->vme_start < this_entry->vme_end);
15724 if (prev_entry->map_aligned) {
15725 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15726 VM_MAP_PAGE_MASK(map)));
15727 }
15728 this_entry->vme_start = prev_entry->vme_start;
15729 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15730
15731 if (map->holelistenabled) {
15732 vm_map_store_update_first_free(map, this_entry, TRUE);
15733 }
15734
15735 if (prev_entry->is_sub_map) {
15736 vm_map_deallocate(VME_SUBMAP(prev_entry));
15737 } else {
15738 vm_object_deallocate(VME_OBJECT(prev_entry));
15739 }
15740 vm_map_entry_dispose(prev_entry);
15741 SAVE_HINT_MAP_WRITE(map, this_entry);
15742 }
15743 }
15744
15745 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15746 vm_map_simplify(
15747 vm_map_t map,
15748 vm_map_offset_t start)
15749 {
15750 vm_map_entry_t this_entry;
15751
15752 vm_map_lock(map);
15753 if (vm_map_lookup_entry(map, start, &this_entry)) {
15754 vm_map_simplify_entry(map, this_entry);
15755 vm_map_simplify_entry(map, this_entry->vme_next);
15756 }
15757 vm_map_unlock(map);
15758 }
15759
15760 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15761 vm_map_simplify_range(
15762 vm_map_t map,
15763 vm_map_offset_t start,
15764 vm_map_offset_t end)
15765 {
15766 vm_map_entry_t entry;
15767
15768 /*
15769 * The map should be locked (for "write") by the caller.
15770 */
15771
15772 if (start >= end) {
15773 /* invalid address range */
15774 return;
15775 }
15776
15777 start = vm_map_trunc_page(start,
15778 VM_MAP_PAGE_MASK(map));
15779 end = vm_map_round_page(end,
15780 VM_MAP_PAGE_MASK(map));
15781
15782 if (!vm_map_lookup_entry(map, start, &entry)) {
15783 /* "start" is not mapped and "entry" ends before "start" */
15784 if (entry == vm_map_to_entry(map)) {
15785 /* start with first entry in the map */
15786 entry = vm_map_first_entry(map);
15787 } else {
15788 /* start with next entry */
15789 entry = entry->vme_next;
15790 }
15791 }
15792
15793 while (entry != vm_map_to_entry(map) &&
15794 entry->vme_start <= end) {
15795 /* try and coalesce "entry" with its previous entry */
15796 vm_map_simplify_entry(map, entry);
15797 entry = entry->vme_next;
15798 }
15799 }
15800
15801
15802 /*
15803 * Routine: vm_map_machine_attribute
15804 * Purpose:
15805 * Provide machine-specific attributes to mappings,
15806 * such as cachability etc. for machines that provide
15807 * them. NUMA architectures and machines with big/strange
15808 * caches will use this.
15809 * Note:
15810 * Responsibilities for locking and checking are handled here,
15811 * everything else in the pmap module. If any non-volatile
15812 * information must be kept, the pmap module should handle
15813 * it itself. [This assumes that attributes do not
15814 * need to be inherited, which seems ok to me]
15815 */
15816 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15817 vm_map_machine_attribute(
15818 vm_map_t map,
15819 vm_map_offset_t start,
15820 vm_map_offset_t end,
15821 vm_machine_attribute_t attribute,
15822 vm_machine_attribute_val_t* value) /* IN/OUT */
15823 {
15824 kern_return_t ret;
15825 vm_map_size_t sync_size;
15826 vm_map_entry_t entry;
15827
15828 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15829 return KERN_INVALID_ADDRESS;
15830 }
15831 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
15832 return KERN_INVALID_ADDRESS;
15833 }
15834
15835 /* Figure how much memory we need to flush (in page increments) */
15836 sync_size = end - start;
15837
15838 vm_map_lock(map);
15839
15840 if (attribute != MATTR_CACHE) {
15841 /* If we don't have to find physical addresses, we */
15842 /* don't have to do an explicit traversal here. */
15843 ret = pmap_attribute(map->pmap, start, end - start,
15844 attribute, value);
15845 vm_map_unlock(map);
15846 return ret;
15847 }
15848
15849 ret = KERN_SUCCESS; /* Assume it all worked */
15850
15851 while (sync_size) {
15852 if (vm_map_lookup_entry(map, start, &entry)) {
15853 vm_map_size_t sub_size;
15854 if ((entry->vme_end - start) > sync_size) {
15855 sub_size = sync_size;
15856 sync_size = 0;
15857 } else {
15858 sub_size = entry->vme_end - start;
15859 sync_size -= sub_size;
15860 }
15861 if (entry->is_sub_map) {
15862 vm_map_offset_t sub_start;
15863 vm_map_offset_t sub_end;
15864
15865 sub_start = (start - entry->vme_start)
15866 + VME_OFFSET(entry);
15867 sub_end = sub_start + sub_size;
15868 vm_map_machine_attribute(
15869 VME_SUBMAP(entry),
15870 sub_start,
15871 sub_end,
15872 attribute, value);
15873 } else if (VME_OBJECT(entry)) {
15874 vm_page_t m;
15875 vm_object_t object;
15876 vm_object_t base_object;
15877 vm_object_t last_object;
15878 vm_object_offset_t offset;
15879 vm_object_offset_t base_offset;
15880 vm_map_size_t range;
15881 range = sub_size;
15882 offset = (start - entry->vme_start)
15883 + VME_OFFSET(entry);
15884 offset = vm_object_trunc_page(offset);
15885 base_offset = offset;
15886 object = VME_OBJECT(entry);
15887 base_object = object;
15888 last_object = NULL;
15889
15890 vm_object_lock(object);
15891
15892 while (range) {
15893 m = vm_page_lookup(
15894 object, offset);
15895
15896 if (m && !m->vmp_fictitious) {
15897 ret =
15898 pmap_attribute_cache_sync(
15899 VM_PAGE_GET_PHYS_PAGE(m),
15900 PAGE_SIZE,
15901 attribute, value);
15902 } else if (object->shadow) {
15903 offset = offset + object->vo_shadow_offset;
15904 last_object = object;
15905 object = object->shadow;
15906 vm_object_lock(last_object->shadow);
15907 vm_object_unlock(last_object);
15908 continue;
15909 }
15910 if (range < PAGE_SIZE) {
15911 range = 0;
15912 } else {
15913 range -= PAGE_SIZE;
15914 }
15915
15916 if (base_object != object) {
15917 vm_object_unlock(object);
15918 vm_object_lock(base_object);
15919 object = base_object;
15920 }
15921 /* Bump to the next page */
15922 base_offset += PAGE_SIZE;
15923 offset = base_offset;
15924 }
15925 vm_object_unlock(object);
15926 }
15927 start += sub_size;
15928 } else {
15929 vm_map_unlock(map);
15930 return KERN_FAILURE;
15931 }
15932 }
15933
15934 vm_map_unlock(map);
15935
15936 return ret;
15937 }
15938
15939 /*
15940 * vm_map_behavior_set:
15941 *
15942 * Sets the paging reference behavior of the specified address
15943 * range in the target map. Paging reference behavior affects
15944 * how pagein operations resulting from faults on the map will be
15945 * clustered.
15946 */
15947 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15948 vm_map_behavior_set(
15949 vm_map_t map,
15950 vm_map_offset_t start,
15951 vm_map_offset_t end,
15952 vm_behavior_t new_behavior)
15953 {
15954 vm_map_entry_t entry;
15955 vm_map_entry_t temp_entry;
15956
15957 if (start > end ||
15958 start < vm_map_min(map) ||
15959 end > vm_map_max(map)) {
15960 return KERN_NO_SPACE;
15961 }
15962 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
15963 return KERN_INVALID_ADDRESS;
15964 }
15965
15966 switch (new_behavior) {
15967 /*
15968 * This first block of behaviors all set a persistent state on the specified
15969 * memory range. All we have to do here is to record the desired behavior
15970 * in the vm_map_entry_t's.
15971 */
15972
15973 case VM_BEHAVIOR_DEFAULT:
15974 case VM_BEHAVIOR_RANDOM:
15975 case VM_BEHAVIOR_SEQUENTIAL:
15976 case VM_BEHAVIOR_RSEQNTL:
15977 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15978 vm_map_lock(map);
15979
15980 /*
15981 * The entire address range must be valid for the map.
15982 * Note that vm_map_range_check() does a
15983 * vm_map_lookup_entry() internally and returns the
15984 * entry containing the start of the address range if
15985 * the entire range is valid.
15986 */
15987 if (vm_map_range_check(map, start, end, &temp_entry)) {
15988 entry = temp_entry;
15989 vm_map_clip_start(map, entry, start);
15990 } else {
15991 vm_map_unlock(map);
15992 return KERN_INVALID_ADDRESS;
15993 }
15994
15995 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15996 vm_map_clip_end(map, entry, end);
15997 if (entry->is_sub_map) {
15998 assert(!entry->use_pmap);
15999 }
16000
16001 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
16002 entry->zero_wired_pages = TRUE;
16003 } else {
16004 entry->behavior = new_behavior;
16005 }
16006 entry = entry->vme_next;
16007 }
16008
16009 vm_map_unlock(map);
16010 break;
16011
16012 /*
16013 * The rest of these are different from the above in that they cause
16014 * an immediate action to take place as opposed to setting a behavior that
16015 * affects future actions.
16016 */
16017
16018 case VM_BEHAVIOR_WILLNEED:
16019 return vm_map_willneed(map, start, end);
16020
16021 case VM_BEHAVIOR_DONTNEED:
16022 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
16023
16024 case VM_BEHAVIOR_FREE:
16025 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
16026
16027 case VM_BEHAVIOR_REUSABLE:
16028 return vm_map_reusable_pages(map, start, end);
16029
16030 case VM_BEHAVIOR_REUSE:
16031 return vm_map_reuse_pages(map, start, end);
16032
16033 case VM_BEHAVIOR_CAN_REUSE:
16034 return vm_map_can_reuse(map, start, end);
16035
16036 #if MACH_ASSERT
16037 case VM_BEHAVIOR_PAGEOUT:
16038 return vm_map_pageout(map, start, end);
16039 #endif /* MACH_ASSERT */
16040
16041 default:
16042 return KERN_INVALID_ARGUMENT;
16043 }
16044
16045 return KERN_SUCCESS;
16046 }
16047
16048
16049 /*
16050 * Internals for madvise(MADV_WILLNEED) system call.
16051 *
16052 * The implementation is to do:-
16053 * a) read-ahead if the mapping corresponds to a mapped regular file
16054 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
16055 */
16056
16057
16058 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16059 vm_map_willneed(
16060 vm_map_t map,
16061 vm_map_offset_t start,
16062 vm_map_offset_t end
16063 )
16064 {
16065 vm_map_entry_t entry;
16066 vm_object_t object;
16067 memory_object_t pager;
16068 struct vm_object_fault_info fault_info = {};
16069 kern_return_t kr;
16070 vm_object_size_t len;
16071 vm_object_offset_t offset;
16072
16073 fault_info.interruptible = THREAD_UNINT; /* ignored value */
16074 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
16075 fault_info.stealth = TRUE;
16076
16077 /*
16078 * The MADV_WILLNEED operation doesn't require any changes to the
16079 * vm_map_entry_t's, so the read lock is sufficient.
16080 */
16081
16082 vm_map_lock_read(map);
16083
16084 /*
16085 * The madvise semantics require that the address range be fully
16086 * allocated with no holes. Otherwise, we're required to return
16087 * an error.
16088 */
16089
16090 if (!vm_map_range_check(map, start, end, &entry)) {
16091 vm_map_unlock_read(map);
16092 return KERN_INVALID_ADDRESS;
16093 }
16094
16095 /*
16096 * Examine each vm_map_entry_t in the range.
16097 */
16098 for (; entry != vm_map_to_entry(map) && start < end;) {
16099 /*
16100 * The first time through, the start address could be anywhere
16101 * within the vm_map_entry we found. So adjust the offset to
16102 * correspond. After that, the offset will always be zero to
16103 * correspond to the beginning of the current vm_map_entry.
16104 */
16105 offset = (start - entry->vme_start) + VME_OFFSET(entry);
16106
16107 /*
16108 * Set the length so we don't go beyond the end of the
16109 * map_entry or beyond the end of the range we were given.
16110 * This range could span also multiple map entries all of which
16111 * map different files, so make sure we only do the right amount
16112 * of I/O for each object. Note that it's possible for there
16113 * to be multiple map entries all referring to the same object
16114 * but with different page permissions, but it's not worth
16115 * trying to optimize that case.
16116 */
16117 len = MIN(entry->vme_end - start, end - start);
16118
16119 if ((vm_size_t) len != len) {
16120 /* 32-bit overflow */
16121 len = (vm_size_t) (0 - PAGE_SIZE);
16122 }
16123 fault_info.cluster_size = (vm_size_t) len;
16124 fault_info.lo_offset = offset;
16125 fault_info.hi_offset = offset + len;
16126 fault_info.user_tag = VME_ALIAS(entry);
16127 fault_info.pmap_options = 0;
16128 if (entry->iokit_acct ||
16129 (!entry->is_sub_map && !entry->use_pmap)) {
16130 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
16131 }
16132 fault_info.fi_xnu_user_debug = entry->vme_xnu_user_debug;
16133
16134 /*
16135 * If the entry is a submap OR there's no read permission
16136 * to this mapping, then just skip it.
16137 */
16138 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
16139 entry = entry->vme_next;
16140 start = entry->vme_start;
16141 continue;
16142 }
16143
16144 object = VME_OBJECT(entry);
16145
16146 if (object == NULL ||
16147 (object && object->internal)) {
16148 /*
16149 * Memory range backed by anonymous memory.
16150 */
16151 vm_size_t region_size = 0, effective_page_size = 0;
16152 vm_map_offset_t addr = 0, effective_page_mask = 0;
16153
16154 region_size = len;
16155 addr = start;
16156
16157 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
16158 effective_page_size = effective_page_mask + 1;
16159
16160 vm_map_unlock_read(map);
16161
16162 while (region_size) {
16163 vm_pre_fault(
16164 vm_map_trunc_page(addr, effective_page_mask),
16165 VM_PROT_READ | VM_PROT_WRITE);
16166
16167 region_size -= effective_page_size;
16168 addr += effective_page_size;
16169 }
16170 } else {
16171 /*
16172 * Find the file object backing this map entry. If there is
16173 * none, then we simply ignore the "will need" advice for this
16174 * entry and go on to the next one.
16175 */
16176 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
16177 entry = entry->vme_next;
16178 start = entry->vme_start;
16179 continue;
16180 }
16181
16182 vm_object_paging_begin(object);
16183 pager = object->pager;
16184 vm_object_unlock(object);
16185
16186 /*
16187 * The data_request() could take a long time, so let's
16188 * release the map lock to avoid blocking other threads.
16189 */
16190 vm_map_unlock_read(map);
16191
16192 /*
16193 * Get the data from the object asynchronously.
16194 *
16195 * Note that memory_object_data_request() places limits on the
16196 * amount of I/O it will do. Regardless of the len we
16197 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
16198 * silently truncates the len to that size. This isn't
16199 * necessarily bad since madvise shouldn't really be used to
16200 * page in unlimited amounts of data. Other Unix variants
16201 * limit the willneed case as well. If this turns out to be an
16202 * issue for developers, then we can always adjust the policy
16203 * here and still be backwards compatible since this is all
16204 * just "advice".
16205 */
16206 kr = memory_object_data_request(
16207 pager,
16208 vm_object_trunc_page(offset) + object->paging_offset,
16209 0, /* ignored */
16210 VM_PROT_READ,
16211 (memory_object_fault_info_t)&fault_info);
16212
16213 vm_object_lock(object);
16214 vm_object_paging_end(object);
16215 vm_object_unlock(object);
16216
16217 /*
16218 * If we couldn't do the I/O for some reason, just give up on
16219 * the madvise. We still return success to the user since
16220 * madvise isn't supposed to fail when the advice can't be
16221 * taken.
16222 */
16223
16224 if (kr != KERN_SUCCESS) {
16225 return KERN_SUCCESS;
16226 }
16227 }
16228
16229 start += len;
16230 if (start >= end) {
16231 /* done */
16232 return KERN_SUCCESS;
16233 }
16234
16235 /* look up next entry */
16236 vm_map_lock_read(map);
16237 if (!vm_map_lookup_entry(map, start, &entry)) {
16238 /*
16239 * There's a new hole in the address range.
16240 */
16241 vm_map_unlock_read(map);
16242 return KERN_INVALID_ADDRESS;
16243 }
16244 }
16245
16246 vm_map_unlock_read(map);
16247 return KERN_SUCCESS;
16248 }
16249
16250 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)16251 vm_map_entry_is_reusable(
16252 vm_map_entry_t entry)
16253 {
16254 /* Only user map entries */
16255
16256 vm_object_t object;
16257
16258 if (entry->is_sub_map) {
16259 return FALSE;
16260 }
16261
16262 switch (VME_ALIAS(entry)) {
16263 case VM_MEMORY_MALLOC:
16264 case VM_MEMORY_MALLOC_SMALL:
16265 case VM_MEMORY_MALLOC_LARGE:
16266 case VM_MEMORY_REALLOC:
16267 case VM_MEMORY_MALLOC_TINY:
16268 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
16269 case VM_MEMORY_MALLOC_LARGE_REUSED:
16270 /*
16271 * This is a malloc() memory region: check if it's still
16272 * in its original state and can be re-used for more
16273 * malloc() allocations.
16274 */
16275 break;
16276 default:
16277 /*
16278 * Not a malloc() memory region: let the caller decide if
16279 * it's re-usable.
16280 */
16281 return TRUE;
16282 }
16283
16284 if (/*entry->is_shared ||*/
16285 entry->is_sub_map ||
16286 entry->in_transition ||
16287 entry->protection != VM_PROT_DEFAULT ||
16288 entry->max_protection != VM_PROT_ALL ||
16289 entry->inheritance != VM_INHERIT_DEFAULT ||
16290 entry->no_cache ||
16291 entry->vme_permanent ||
16292 entry->superpage_size != FALSE ||
16293 entry->zero_wired_pages ||
16294 entry->wired_count != 0 ||
16295 entry->user_wired_count != 0) {
16296 return FALSE;
16297 }
16298
16299 object = VME_OBJECT(entry);
16300 if (object == VM_OBJECT_NULL) {
16301 return TRUE;
16302 }
16303 if (
16304 #if 0
16305 /*
16306 * Let's proceed even if the VM object is potentially
16307 * shared.
16308 * We check for this later when processing the actual
16309 * VM pages, so the contents will be safe if shared.
16310 *
16311 * But we can still mark this memory region as "reusable" to
16312 * acknowledge that the caller did let us know that the memory
16313 * could be re-used and should not be penalized for holding
16314 * on to it. This allows its "resident size" to not include
16315 * the reusable range.
16316 */
16317 object->ref_count == 1 &&
16318 #endif
16319 object->wired_page_count == 0 &&
16320 object->copy == VM_OBJECT_NULL &&
16321 object->shadow == VM_OBJECT_NULL &&
16322 object->internal &&
16323 object->purgable == VM_PURGABLE_DENY &&
16324 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
16325 !object->code_signed) {
16326 return TRUE;
16327 }
16328 return FALSE;
16329 }
16330
16331 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16332 vm_map_reuse_pages(
16333 vm_map_t map,
16334 vm_map_offset_t start,
16335 vm_map_offset_t end)
16336 {
16337 vm_map_entry_t entry;
16338 vm_object_t object;
16339 vm_object_offset_t start_offset, end_offset;
16340
16341 /*
16342 * The MADV_REUSE operation doesn't require any changes to the
16343 * vm_map_entry_t's, so the read lock is sufficient.
16344 */
16345
16346 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16347 /*
16348 * XXX TODO4K
16349 * need to figure out what reusable means for a
16350 * portion of a native page.
16351 */
16352 return KERN_SUCCESS;
16353 }
16354
16355 vm_map_lock_read(map);
16356 assert(map->pmap != kernel_pmap); /* protect alias access */
16357
16358 /*
16359 * The madvise semantics require that the address range be fully
16360 * allocated with no holes. Otherwise, we're required to return
16361 * an error.
16362 */
16363
16364 if (!vm_map_range_check(map, start, end, &entry)) {
16365 vm_map_unlock_read(map);
16366 vm_page_stats_reusable.reuse_pages_failure++;
16367 return KERN_INVALID_ADDRESS;
16368 }
16369
16370 /*
16371 * Examine each vm_map_entry_t in the range.
16372 */
16373 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16374 entry = entry->vme_next) {
16375 /*
16376 * Sanity check on the VM map entry.
16377 */
16378 if (!vm_map_entry_is_reusable(entry)) {
16379 vm_map_unlock_read(map);
16380 vm_page_stats_reusable.reuse_pages_failure++;
16381 return KERN_INVALID_ADDRESS;
16382 }
16383
16384 /*
16385 * The first time through, the start address could be anywhere
16386 * within the vm_map_entry we found. So adjust the offset to
16387 * correspond.
16388 */
16389 if (entry->vme_start < start) {
16390 start_offset = start - entry->vme_start;
16391 } else {
16392 start_offset = 0;
16393 }
16394 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16395 start_offset += VME_OFFSET(entry);
16396 end_offset += VME_OFFSET(entry);
16397
16398 object = VME_OBJECT(entry);
16399 if (object != VM_OBJECT_NULL) {
16400 vm_object_lock(object);
16401 vm_object_reuse_pages(object, start_offset, end_offset,
16402 TRUE);
16403 vm_object_unlock(object);
16404 }
16405
16406 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
16407 /*
16408 * XXX
16409 * We do not hold the VM map exclusively here.
16410 * The "alias" field is not that critical, so it's
16411 * safe to update it here, as long as it is the only
16412 * one that can be modified while holding the VM map
16413 * "shared".
16414 */
16415 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
16416 }
16417 }
16418
16419 vm_map_unlock_read(map);
16420 vm_page_stats_reusable.reuse_pages_success++;
16421 return KERN_SUCCESS;
16422 }
16423
16424
16425 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16426 vm_map_reusable_pages(
16427 vm_map_t map,
16428 vm_map_offset_t start,
16429 vm_map_offset_t end)
16430 {
16431 vm_map_entry_t entry;
16432 vm_object_t object;
16433 vm_object_offset_t start_offset, end_offset;
16434 vm_map_offset_t pmap_offset;
16435
16436 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16437 /*
16438 * XXX TODO4K
16439 * need to figure out what reusable means for a portion
16440 * of a native page.
16441 */
16442 return KERN_SUCCESS;
16443 }
16444
16445 /*
16446 * The MADV_REUSABLE operation doesn't require any changes to the
16447 * vm_map_entry_t's, so the read lock is sufficient.
16448 */
16449
16450 vm_map_lock_read(map);
16451 assert(map->pmap != kernel_pmap); /* protect alias access */
16452
16453 /*
16454 * The madvise semantics require that the address range be fully
16455 * allocated with no holes. Otherwise, we're required to return
16456 * an error.
16457 */
16458
16459 if (!vm_map_range_check(map, start, end, &entry)) {
16460 vm_map_unlock_read(map);
16461 vm_page_stats_reusable.reusable_pages_failure++;
16462 return KERN_INVALID_ADDRESS;
16463 }
16464
16465 /*
16466 * Examine each vm_map_entry_t in the range.
16467 */
16468 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16469 entry = entry->vme_next) {
16470 int kill_pages = 0;
16471 boolean_t reusable_no_write = FALSE;
16472
16473 /*
16474 * Sanity check on the VM map entry.
16475 */
16476 if (!vm_map_entry_is_reusable(entry)) {
16477 vm_map_unlock_read(map);
16478 vm_page_stats_reusable.reusable_pages_failure++;
16479 return KERN_INVALID_ADDRESS;
16480 }
16481
16482 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit
16483 #if __arm64e__
16484 && !entry->used_for_tpro
16485 #endif
16486 ) {
16487 /* not writable: can't discard contents */
16488 vm_map_unlock_read(map);
16489 vm_page_stats_reusable.reusable_nonwritable++;
16490 vm_page_stats_reusable.reusable_pages_failure++;
16491 return KERN_PROTECTION_FAILURE;
16492 }
16493
16494 /*
16495 * The first time through, the start address could be anywhere
16496 * within the vm_map_entry we found. So adjust the offset to
16497 * correspond.
16498 */
16499 if (entry->vme_start < start) {
16500 start_offset = start - entry->vme_start;
16501 pmap_offset = start;
16502 } else {
16503 start_offset = 0;
16504 pmap_offset = entry->vme_start;
16505 }
16506 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16507 start_offset += VME_OFFSET(entry);
16508 end_offset += VME_OFFSET(entry);
16509
16510 object = VME_OBJECT(entry);
16511 if (object == VM_OBJECT_NULL) {
16512 continue;
16513 }
16514
16515 if (entry->protection & VM_PROT_EXECUTE) {
16516 /*
16517 * Executable mappings might be write-protected by
16518 * hardware, so do not attempt to write to these pages.
16519 */
16520 reusable_no_write = TRUE;
16521 }
16522
16523 vm_object_lock(object);
16524 if (((object->ref_count == 1) ||
16525 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16526 object->copy == VM_OBJECT_NULL)) &&
16527 object->shadow == VM_OBJECT_NULL &&
16528 /*
16529 * "iokit_acct" entries are billed for their virtual size
16530 * (rather than for their resident pages only), so they
16531 * wouldn't benefit from making pages reusable, and it
16532 * would be hard to keep track of pages that are both
16533 * "iokit_acct" and "reusable" in the pmap stats and
16534 * ledgers.
16535 */
16536 !(entry->iokit_acct ||
16537 (!entry->is_sub_map && !entry->use_pmap))) {
16538 if (object->ref_count != 1) {
16539 vm_page_stats_reusable.reusable_shared++;
16540 }
16541 kill_pages = 1;
16542 } else {
16543 kill_pages = -1;
16544 }
16545 if (kill_pages != -1) {
16546 vm_object_deactivate_pages(object,
16547 start_offset,
16548 end_offset - start_offset,
16549 kill_pages,
16550 TRUE /*reusable_pages*/,
16551 reusable_no_write,
16552 map->pmap,
16553 pmap_offset);
16554 } else {
16555 vm_page_stats_reusable.reusable_pages_shared++;
16556 DTRACE_VM4(vm_map_reusable_pages_shared,
16557 unsigned int, VME_ALIAS(entry),
16558 vm_map_t, map,
16559 vm_map_entry_t, entry,
16560 vm_object_t, object);
16561 }
16562 vm_object_unlock(object);
16563
16564 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16565 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16566 /*
16567 * XXX
16568 * We do not hold the VM map exclusively here.
16569 * The "alias" field is not that critical, so it's
16570 * safe to update it here, as long as it is the only
16571 * one that can be modified while holding the VM map
16572 * "shared".
16573 */
16574 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16575 }
16576 }
16577
16578 vm_map_unlock_read(map);
16579 vm_page_stats_reusable.reusable_pages_success++;
16580 return KERN_SUCCESS;
16581 }
16582
16583
16584 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16585 vm_map_can_reuse(
16586 vm_map_t map,
16587 vm_map_offset_t start,
16588 vm_map_offset_t end)
16589 {
16590 vm_map_entry_t entry;
16591
16592 /*
16593 * The MADV_REUSABLE operation doesn't require any changes to the
16594 * vm_map_entry_t's, so the read lock is sufficient.
16595 */
16596
16597 vm_map_lock_read(map);
16598 assert(map->pmap != kernel_pmap); /* protect alias access */
16599
16600 /*
16601 * The madvise semantics require that the address range be fully
16602 * allocated with no holes. Otherwise, we're required to return
16603 * an error.
16604 */
16605
16606 if (!vm_map_range_check(map, start, end, &entry)) {
16607 vm_map_unlock_read(map);
16608 vm_page_stats_reusable.can_reuse_failure++;
16609 return KERN_INVALID_ADDRESS;
16610 }
16611
16612 /*
16613 * Examine each vm_map_entry_t in the range.
16614 */
16615 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16616 entry = entry->vme_next) {
16617 /*
16618 * Sanity check on the VM map entry.
16619 */
16620 if (!vm_map_entry_is_reusable(entry)) {
16621 vm_map_unlock_read(map);
16622 vm_page_stats_reusable.can_reuse_failure++;
16623 return KERN_INVALID_ADDRESS;
16624 }
16625 }
16626
16627 vm_map_unlock_read(map);
16628 vm_page_stats_reusable.can_reuse_success++;
16629 return KERN_SUCCESS;
16630 }
16631
16632
16633 #if MACH_ASSERT
16634 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16635 vm_map_pageout(
16636 vm_map_t map,
16637 vm_map_offset_t start,
16638 vm_map_offset_t end)
16639 {
16640 vm_map_entry_t entry;
16641
16642 /*
16643 * The MADV_PAGEOUT operation doesn't require any changes to the
16644 * vm_map_entry_t's, so the read lock is sufficient.
16645 */
16646
16647 vm_map_lock_read(map);
16648
16649 /*
16650 * The madvise semantics require that the address range be fully
16651 * allocated with no holes. Otherwise, we're required to return
16652 * an error.
16653 */
16654
16655 if (!vm_map_range_check(map, start, end, &entry)) {
16656 vm_map_unlock_read(map);
16657 return KERN_INVALID_ADDRESS;
16658 }
16659
16660 /*
16661 * Examine each vm_map_entry_t in the range.
16662 */
16663 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16664 entry = entry->vme_next) {
16665 vm_object_t object;
16666
16667 /*
16668 * Sanity check on the VM map entry.
16669 */
16670 if (entry->is_sub_map) {
16671 vm_map_t submap;
16672 vm_map_offset_t submap_start;
16673 vm_map_offset_t submap_end;
16674 vm_map_entry_t submap_entry;
16675
16676 submap = VME_SUBMAP(entry);
16677 submap_start = VME_OFFSET(entry);
16678 submap_end = submap_start + (entry->vme_end -
16679 entry->vme_start);
16680
16681 vm_map_lock_read(submap);
16682
16683 if (!vm_map_range_check(submap,
16684 submap_start,
16685 submap_end,
16686 &submap_entry)) {
16687 vm_map_unlock_read(submap);
16688 vm_map_unlock_read(map);
16689 return KERN_INVALID_ADDRESS;
16690 }
16691
16692 if (submap_entry->is_sub_map) {
16693 vm_map_unlock_read(submap);
16694 continue;
16695 }
16696
16697 object = VME_OBJECT(submap_entry);
16698 if (object == VM_OBJECT_NULL || !object->internal) {
16699 vm_map_unlock_read(submap);
16700 continue;
16701 }
16702
16703 vm_object_pageout(object);
16704
16705 vm_map_unlock_read(submap);
16706 submap = VM_MAP_NULL;
16707 submap_entry = VM_MAP_ENTRY_NULL;
16708 continue;
16709 }
16710
16711 object = VME_OBJECT(entry);
16712 if (object == VM_OBJECT_NULL || !object->internal) {
16713 continue;
16714 }
16715
16716 vm_object_pageout(object);
16717 }
16718
16719 vm_map_unlock_read(map);
16720 return KERN_SUCCESS;
16721 }
16722 #endif /* MACH_ASSERT */
16723
16724
16725 /*
16726 * Routine: vm_map_entry_insert
16727 *
16728 * Description: This routine inserts a new vm_entry in a locked map.
16729 */
16730 static vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,boolean_t clear_map_aligned)16731 vm_map_entry_insert(
16732 vm_map_t map,
16733 vm_map_entry_t insp_entry,
16734 vm_map_offset_t start,
16735 vm_map_offset_t end,
16736 vm_object_t object,
16737 vm_object_offset_t offset,
16738 vm_map_kernel_flags_t vmk_flags,
16739 boolean_t needs_copy,
16740 vm_prot_t cur_protection,
16741 vm_prot_t max_protection,
16742 vm_inherit_t inheritance,
16743 boolean_t clear_map_aligned)
16744 {
16745 vm_map_entry_t new_entry;
16746 boolean_t map_aligned = FALSE;
16747
16748 assert(insp_entry != (vm_map_entry_t)0);
16749 vm_map_lock_assert_exclusive(map);
16750
16751 #if DEVELOPMENT || DEBUG
16752 vm_object_offset_t end_offset = 0;
16753 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16754 #endif /* DEVELOPMENT || DEBUG */
16755
16756 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16757 map_aligned = TRUE;
16758 }
16759 if (clear_map_aligned &&
16760 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16761 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16762 map_aligned = FALSE;
16763 }
16764 if (map_aligned) {
16765 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
16766 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
16767 } else {
16768 assert(page_aligned(start));
16769 assert(page_aligned(end));
16770 }
16771 assert(start < end);
16772
16773 new_entry = vm_map_entry_create(map);
16774
16775 new_entry->vme_start = start;
16776 new_entry->vme_end = end;
16777
16778 if (vmk_flags.vmkf_submap) {
16779 new_entry->vme_atomic = vmk_flags.vmkf_submap_atomic;
16780 VME_SUBMAP_SET(new_entry, (vm_map_t)object);
16781 } else {
16782 VME_OBJECT_SET(new_entry, object, false, 0);
16783 }
16784 VME_OFFSET_SET(new_entry, offset);
16785 VME_ALIAS_SET(new_entry, vmk_flags.vm_tag);
16786
16787 new_entry->map_aligned = map_aligned;
16788 new_entry->needs_copy = needs_copy;
16789 new_entry->inheritance = inheritance;
16790 new_entry->protection = cur_protection;
16791 new_entry->max_protection = max_protection;
16792 /*
16793 * submap: "use_pmap" means "nested".
16794 * default: false.
16795 *
16796 * object: "use_pmap" means "use pmap accounting" for footprint.
16797 * default: true.
16798 */
16799 new_entry->use_pmap = !vmk_flags.vmkf_submap;
16800 new_entry->no_cache = vmk_flags.vmf_no_cache;
16801 new_entry->vme_permanent = vmk_flags.vmf_permanent;
16802 new_entry->translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
16803 new_entry->vme_no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
16804 new_entry->superpage_size = (vmk_flags.vmf_superpage_size != 0);
16805
16806 if (vmk_flags.vmkf_map_jit) {
16807 if (!(map->jit_entry_exists) ||
16808 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16809 new_entry->used_for_jit = TRUE;
16810 map->jit_entry_exists = TRUE;
16811 }
16812 }
16813
16814 /*
16815 * Insert the new entry into the list.
16816 */
16817
16818 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16819 map->size += end - start;
16820
16821 /*
16822 * Update the free space hint and the lookup hint.
16823 */
16824
16825 SAVE_HINT_MAP_WRITE(map, new_entry);
16826 return new_entry;
16827 }
16828
16829 /*
16830 * Routine: vm_map_remap_extract
16831 *
16832 * Description: This routine returns a vm_entry list from a map.
16833 */
16834 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,vm_map_copy_t map_copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16835 vm_map_remap_extract(
16836 vm_map_t map,
16837 vm_map_offset_t addr,
16838 vm_map_size_t size,
16839 boolean_t copy,
16840 vm_map_copy_t map_copy,
16841 vm_prot_t *cur_protection, /* IN/OUT */
16842 vm_prot_t *max_protection, /* IN/OUT */
16843 /* What, no behavior? */
16844 vm_inherit_t inheritance,
16845 vm_map_kernel_flags_t vmk_flags)
16846 {
16847 struct vm_map_header *map_header = &map_copy->cpy_hdr;
16848 kern_return_t result;
16849 vm_map_size_t mapped_size;
16850 vm_map_size_t tmp_size;
16851 vm_map_entry_t src_entry; /* result of last map lookup */
16852 vm_map_entry_t new_entry;
16853 vm_object_offset_t offset;
16854 vm_map_offset_t map_address;
16855 vm_map_offset_t src_start; /* start of entry to map */
16856 vm_map_offset_t src_end; /* end of region to be mapped */
16857 vm_object_t object;
16858 vm_map_version_t version;
16859 boolean_t src_needs_copy;
16860 boolean_t new_entry_needs_copy;
16861 vm_map_entry_t saved_src_entry;
16862 boolean_t src_entry_was_wired;
16863 vm_prot_t max_prot_for_prot_copy;
16864 vm_map_offset_t effective_page_mask;
16865 bool pageable, same_map;
16866 boolean_t vm_remap_legacy;
16867 vm_prot_t required_cur_prot, required_max_prot;
16868 vm_object_t new_copy_object; /* vm_object_copy_* result */
16869 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
16870 #if __arm64e__
16871 boolean_t saved_used_for_tpro; /* Saved used_for_tpro. */
16872 #endif
16873
16874 pageable = vmk_flags.vmkf_copy_pageable;
16875 same_map = vmk_flags.vmkf_copy_same_map;
16876
16877 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16878
16879 assert(map != VM_MAP_NULL);
16880 assert(size != 0);
16881 assert(size == vm_map_round_page(size, effective_page_mask));
16882 assert(inheritance == VM_INHERIT_NONE ||
16883 inheritance == VM_INHERIT_COPY ||
16884 inheritance == VM_INHERIT_SHARE);
16885 assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16886 assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16887 assert((*cur_protection & *max_protection) == *cur_protection);
16888
16889 /*
16890 * Compute start and end of region.
16891 */
16892 src_start = vm_map_trunc_page(addr, effective_page_mask);
16893 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16894
16895 /*
16896 * Initialize map_header.
16897 */
16898 map_header->nentries = 0;
16899 map_header->entries_pageable = pageable;
16900 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16901 map_header->page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(map);
16902 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16903 vm_map_store_init(map_header);
16904
16905 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16906 /*
16907 * Special case for vm_map_protect(VM_PROT_COPY):
16908 * we want to set the new mappings' max protection to the
16909 * specified *max_protection...
16910 */
16911 max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16912 /* ... but we want to use the vm_remap() legacy mode */
16913 *max_protection = VM_PROT_NONE;
16914 *cur_protection = VM_PROT_NONE;
16915 } else {
16916 max_prot_for_prot_copy = VM_PROT_NONE;
16917 }
16918
16919 if (*cur_protection == VM_PROT_NONE &&
16920 *max_protection == VM_PROT_NONE) {
16921 /*
16922 * vm_remap() legacy mode:
16923 * Extract all memory regions in the specified range and
16924 * collect the strictest set of protections allowed on the
16925 * entire range, so the caller knows what they can do with
16926 * the remapped range.
16927 * We start with VM_PROT_ALL and we'll remove the protections
16928 * missing from each memory region.
16929 */
16930 vm_remap_legacy = TRUE;
16931 *cur_protection = VM_PROT_ALL;
16932 *max_protection = VM_PROT_ALL;
16933 required_cur_prot = VM_PROT_NONE;
16934 required_max_prot = VM_PROT_NONE;
16935 } else {
16936 /*
16937 * vm_remap_new() mode:
16938 * Extract all memory regions in the specified range and
16939 * ensure that they have at least the protections specified
16940 * by the caller via *cur_protection and *max_protection.
16941 * The resulting mapping should have these protections.
16942 */
16943 vm_remap_legacy = FALSE;
16944 if (copy) {
16945 required_cur_prot = VM_PROT_NONE;
16946 required_max_prot = VM_PROT_READ;
16947 } else {
16948 required_cur_prot = *cur_protection;
16949 required_max_prot = *max_protection;
16950 }
16951 }
16952
16953 map_address = 0;
16954 mapped_size = 0;
16955 result = KERN_SUCCESS;
16956
16957 /*
16958 * The specified source virtual space might correspond to
16959 * multiple map entries, need to loop on them.
16960 */
16961 vm_map_lock(map);
16962
16963 if (map->pmap == kernel_pmap) {
16964 map_copy->is_kernel_range = true;
16965 map_copy->orig_range = kmem_addr_get_range(addr, size);
16966 #if CONFIG_MAP_RANGES
16967 } else if (map->uses_user_ranges) {
16968 map_copy->is_user_range = true;
16969 map_copy->orig_range = vm_map_user_range_resolve(map, addr, size, NULL);
16970 #endif /* CONFIG_MAP_RANGES */
16971 }
16972
16973 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16974 /*
16975 * This address space uses sub-pages so the range might
16976 * not be re-mappable in an address space with larger
16977 * pages. Re-assemble any broken-up VM map entries to
16978 * improve our chances of making it work.
16979 */
16980 vm_map_simplify_range(map, src_start, src_end);
16981 }
16982 while (mapped_size != size) {
16983 vm_map_size_t entry_size;
16984
16985 /*
16986 * Find the beginning of the region.
16987 */
16988 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16989 result = KERN_INVALID_ADDRESS;
16990 break;
16991 }
16992
16993 if (src_start < src_entry->vme_start ||
16994 (mapped_size && src_start != src_entry->vme_start)) {
16995 result = KERN_INVALID_ADDRESS;
16996 break;
16997 }
16998
16999 tmp_size = size - mapped_size;
17000 if (src_end > src_entry->vme_end) {
17001 tmp_size -= (src_end - src_entry->vme_end);
17002 }
17003
17004 entry_size = (vm_map_size_t)(src_entry->vme_end -
17005 src_entry->vme_start);
17006
17007 if (src_entry->is_sub_map &&
17008 vmk_flags.vmkf_copy_single_object) {
17009 vm_map_t submap;
17010 vm_map_offset_t submap_start;
17011 vm_map_size_t submap_size;
17012 boolean_t submap_needs_copy;
17013
17014 /*
17015 * No check for "required protection" on "src_entry"
17016 * because the protections that matter are the ones
17017 * on the submap's VM map entry, which will be checked
17018 * during the call to vm_map_remap_extract() below.
17019 */
17020 submap_size = src_entry->vme_end - src_start;
17021 if (submap_size > size) {
17022 submap_size = size;
17023 }
17024 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
17025 submap = VME_SUBMAP(src_entry);
17026 if (copy) {
17027 /*
17028 * The caller wants a copy-on-write re-mapping,
17029 * so let's extract from the submap accordingly.
17030 */
17031 submap_needs_copy = TRUE;
17032 } else if (src_entry->needs_copy) {
17033 /*
17034 * The caller wants a shared re-mapping but the
17035 * submap is mapped with "needs_copy", so its
17036 * contents can't be shared as is. Extract the
17037 * contents of the submap as "copy-on-write".
17038 * The re-mapping won't be shared with the
17039 * original mapping but this is equivalent to
17040 * what happened with the original "remap from
17041 * submap" code.
17042 * The shared region is mapped "needs_copy", for
17043 * example.
17044 */
17045 submap_needs_copy = TRUE;
17046 } else {
17047 /*
17048 * The caller wants a shared re-mapping and
17049 * this mapping can be shared (no "needs_copy"),
17050 * so let's extract from the submap accordingly.
17051 * Kernel submaps are mapped without
17052 * "needs_copy", for example.
17053 */
17054 submap_needs_copy = FALSE;
17055 }
17056 vm_map_reference(submap);
17057 vm_map_unlock(map);
17058 src_entry = NULL;
17059 if (vm_remap_legacy) {
17060 *cur_protection = VM_PROT_NONE;
17061 *max_protection = VM_PROT_NONE;
17062 }
17063
17064 DTRACE_VM7(remap_submap_recurse,
17065 vm_map_t, map,
17066 vm_map_offset_t, addr,
17067 vm_map_size_t, size,
17068 boolean_t, copy,
17069 vm_map_offset_t, submap_start,
17070 vm_map_size_t, submap_size,
17071 boolean_t, submap_needs_copy);
17072
17073 result = vm_map_remap_extract(submap,
17074 submap_start,
17075 submap_size,
17076 submap_needs_copy,
17077 map_copy,
17078 cur_protection,
17079 max_protection,
17080 inheritance,
17081 vmk_flags);
17082 vm_map_deallocate(submap);
17083 return result;
17084 }
17085
17086 if (src_entry->is_sub_map) {
17087 /* protections for submap mapping are irrelevant here */
17088 } else if (((src_entry->protection & required_cur_prot) !=
17089 required_cur_prot) ||
17090 ((src_entry->max_protection & required_max_prot) !=
17091 required_max_prot)) {
17092 if (vmk_flags.vmkf_copy_single_object &&
17093 mapped_size != 0) {
17094 /*
17095 * Single object extraction.
17096 * We can't extract more with the required
17097 * protection but we've extracted some, so
17098 * stop there and declare success.
17099 * The caller should check the size of
17100 * the copy entry we've extracted.
17101 */
17102 result = KERN_SUCCESS;
17103 } else {
17104 /*
17105 * VM range extraction.
17106 * Required proctection is not available
17107 * for this part of the range: fail.
17108 */
17109 result = KERN_PROTECTION_FAILURE;
17110 }
17111 break;
17112 }
17113
17114 if (src_entry->is_sub_map) {
17115 vm_map_t submap;
17116 vm_map_offset_t submap_start;
17117 vm_map_size_t submap_size;
17118 vm_map_copy_t submap_copy;
17119 vm_prot_t submap_curprot, submap_maxprot;
17120 boolean_t submap_needs_copy;
17121
17122 /*
17123 * No check for "required protection" on "src_entry"
17124 * because the protections that matter are the ones
17125 * on the submap's VM map entry, which will be checked
17126 * during the call to vm_map_copy_extract() below.
17127 */
17128 object = VM_OBJECT_NULL;
17129 submap_copy = VM_MAP_COPY_NULL;
17130
17131 /* find equivalent range in the submap */
17132 submap = VME_SUBMAP(src_entry);
17133 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
17134 submap_size = tmp_size;
17135 if (copy) {
17136 /*
17137 * The caller wants a copy-on-write re-mapping,
17138 * so let's extract from the submap accordingly.
17139 */
17140 submap_needs_copy = TRUE;
17141 } else if (src_entry->needs_copy) {
17142 /*
17143 * The caller wants a shared re-mapping but the
17144 * submap is mapped with "needs_copy", so its
17145 * contents can't be shared as is. Extract the
17146 * contents of the submap as "copy-on-write".
17147 * The re-mapping won't be shared with the
17148 * original mapping but this is equivalent to
17149 * what happened with the original "remap from
17150 * submap" code.
17151 * The shared region is mapped "needs_copy", for
17152 * example.
17153 */
17154 submap_needs_copy = TRUE;
17155 } else {
17156 /*
17157 * The caller wants a shared re-mapping and
17158 * this mapping can be shared (no "needs_copy"),
17159 * so let's extract from the submap accordingly.
17160 * Kernel submaps are mapped without
17161 * "needs_copy", for example.
17162 */
17163 submap_needs_copy = FALSE;
17164 }
17165 /* extra ref to keep submap alive */
17166 vm_map_reference(submap);
17167
17168 DTRACE_VM7(remap_submap_recurse,
17169 vm_map_t, map,
17170 vm_map_offset_t, addr,
17171 vm_map_size_t, size,
17172 boolean_t, copy,
17173 vm_map_offset_t, submap_start,
17174 vm_map_size_t, submap_size,
17175 boolean_t, submap_needs_copy);
17176
17177 /*
17178 * The map can be safely unlocked since we
17179 * already hold a reference on the submap.
17180 *
17181 * No timestamp since we don't care if the map
17182 * gets modified while we're down in the submap.
17183 * We'll resume the extraction at src_start + tmp_size
17184 * anyway.
17185 */
17186 vm_map_unlock(map);
17187 src_entry = NULL; /* not valid once map is unlocked */
17188
17189 if (vm_remap_legacy) {
17190 submap_curprot = VM_PROT_NONE;
17191 submap_maxprot = VM_PROT_NONE;
17192 if (max_prot_for_prot_copy) {
17193 submap_maxprot = max_prot_for_prot_copy;
17194 }
17195 } else {
17196 assert(!max_prot_for_prot_copy);
17197 submap_curprot = *cur_protection;
17198 submap_maxprot = *max_protection;
17199 }
17200 result = vm_map_copy_extract(submap,
17201 submap_start,
17202 submap_size,
17203 submap_needs_copy,
17204 &submap_copy,
17205 &submap_curprot,
17206 &submap_maxprot,
17207 inheritance,
17208 vmk_flags);
17209
17210 /* release extra ref on submap */
17211 vm_map_deallocate(submap);
17212 submap = VM_MAP_NULL;
17213
17214 if (result != KERN_SUCCESS) {
17215 vm_map_lock(map);
17216 break;
17217 }
17218
17219 /* transfer submap_copy entries to map_header */
17220 while (vm_map_copy_first_entry(submap_copy) !=
17221 vm_map_copy_to_entry(submap_copy)) {
17222 vm_map_entry_t copy_entry;
17223 vm_map_size_t copy_entry_size;
17224
17225 copy_entry = vm_map_copy_first_entry(submap_copy);
17226
17227 /*
17228 * Prevent kernel_object from being exposed to
17229 * user space.
17230 */
17231 if (__improbable(copy_entry->vme_kernel_object)) {
17232 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17233 proc_selfpid(),
17234 (get_bsdtask_info(current_task())
17235 ? proc_name_address(get_bsdtask_info(current_task()))
17236 : "?"));
17237 DTRACE_VM(extract_kernel_only);
17238 result = KERN_INVALID_RIGHT;
17239 vm_map_copy_discard(submap_copy);
17240 submap_copy = VM_MAP_COPY_NULL;
17241 vm_map_lock(map);
17242 break;
17243 }
17244
17245 vm_map_copy_entry_unlink(submap_copy, copy_entry);
17246 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
17247 copy_entry->vme_start = map_address;
17248 copy_entry->vme_end = map_address + copy_entry_size;
17249 map_address += copy_entry_size;
17250 mapped_size += copy_entry_size;
17251 src_start += copy_entry_size;
17252 assert(src_start <= src_end);
17253 _vm_map_store_entry_link(map_header,
17254 map_header->links.prev,
17255 copy_entry);
17256 }
17257 /* done with submap_copy */
17258 vm_map_copy_discard(submap_copy);
17259
17260 if (vm_remap_legacy) {
17261 *cur_protection &= submap_curprot;
17262 *max_protection &= submap_maxprot;
17263 }
17264
17265 /* re-acquire the map lock and continue to next entry */
17266 vm_map_lock(map);
17267 continue;
17268 } else {
17269 object = VME_OBJECT(src_entry);
17270
17271 /*
17272 * Prevent kernel_object from being exposed to
17273 * user space.
17274 */
17275 if (__improbable(object == kernel_object)) {
17276 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17277 proc_selfpid(),
17278 (get_bsdtask_info(current_task())
17279 ? proc_name_address(get_bsdtask_info(current_task()))
17280 : "?"));
17281 DTRACE_VM(extract_kernel_only);
17282 result = KERN_INVALID_RIGHT;
17283 break;
17284 }
17285
17286 if (src_entry->iokit_acct) {
17287 /*
17288 * This entry uses "IOKit accounting".
17289 */
17290 } else if (object != VM_OBJECT_NULL &&
17291 (object->purgable != VM_PURGABLE_DENY ||
17292 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
17293 /*
17294 * Purgeable objects have their own accounting:
17295 * no pmap accounting for them.
17296 */
17297 assertf(!src_entry->use_pmap,
17298 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17299 map,
17300 src_entry,
17301 (uint64_t)src_entry->vme_start,
17302 (uint64_t)src_entry->vme_end,
17303 src_entry->protection,
17304 src_entry->max_protection,
17305 VME_ALIAS(src_entry));
17306 } else {
17307 /*
17308 * Not IOKit or purgeable:
17309 * must be accounted by pmap stats.
17310 */
17311 assertf(src_entry->use_pmap,
17312 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17313 map,
17314 src_entry,
17315 (uint64_t)src_entry->vme_start,
17316 (uint64_t)src_entry->vme_end,
17317 src_entry->protection,
17318 src_entry->max_protection,
17319 VME_ALIAS(src_entry));
17320 }
17321
17322 if (object == VM_OBJECT_NULL) {
17323 assert(!src_entry->needs_copy);
17324 if (src_entry->max_protection == VM_PROT_NONE) {
17325 assert(src_entry->protection == VM_PROT_NONE);
17326 /*
17327 * No VM object and no permissions:
17328 * this must be a reserved range with
17329 * nothing to share or copy.
17330 * There could also be all sorts of
17331 * pmap shenanigans within that reserved
17332 * range, so let's just copy the map
17333 * entry as is to remap a similar
17334 * reserved range.
17335 */
17336 offset = 0; /* no object => no offset */
17337 goto copy_src_entry;
17338 }
17339 object = vm_object_allocate(entry_size);
17340 VME_OFFSET_SET(src_entry, 0);
17341 VME_OBJECT_SET(src_entry, object, false, 0);
17342 assert(src_entry->use_pmap);
17343 assert(!map->mapped_in_other_pmaps);
17344 } else if (src_entry->wired_count ||
17345 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17346 /*
17347 * A wired memory region should not have
17348 * any pending copy-on-write and needs to
17349 * keep pointing at the VM object that
17350 * contains the wired pages.
17351 * If we're sharing this memory (copy=false),
17352 * we'll share this VM object.
17353 * If we're copying this memory (copy=true),
17354 * we'll call vm_object_copy_slowly() below
17355 * and use the new VM object for the remapping.
17356 *
17357 * Or, we are already using an asymmetric
17358 * copy, and therefore we already have
17359 * the right object.
17360 */
17361 assert(!src_entry->needs_copy);
17362 } else if (src_entry->needs_copy || object->shadowed ||
17363 (object->internal && !object->true_share &&
17364 !src_entry->is_shared &&
17365 object->vo_size > entry_size)) {
17366 VME_OBJECT_SHADOW(src_entry, entry_size,
17367 vm_map_always_shadow(map));
17368 assert(src_entry->use_pmap);
17369
17370 if (!src_entry->needs_copy &&
17371 (src_entry->protection & VM_PROT_WRITE)) {
17372 vm_prot_t prot;
17373
17374 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17375
17376 prot = src_entry->protection & ~VM_PROT_WRITE;
17377
17378 if (override_nx(map,
17379 VME_ALIAS(src_entry))
17380 && prot) {
17381 prot |= VM_PROT_EXECUTE;
17382 }
17383
17384 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17385
17386 if (map->mapped_in_other_pmaps) {
17387 vm_object_pmap_protect(
17388 VME_OBJECT(src_entry),
17389 VME_OFFSET(src_entry),
17390 entry_size,
17391 PMAP_NULL,
17392 PAGE_SIZE,
17393 src_entry->vme_start,
17394 prot);
17395 #if MACH_ASSERT
17396 } else if (__improbable(map->pmap == PMAP_NULL)) {
17397 extern boolean_t vm_tests_in_progress;
17398 assert(vm_tests_in_progress);
17399 /*
17400 * Some VM tests (in vm_tests.c)
17401 * sometimes want to use a VM
17402 * map without a pmap.
17403 * Otherwise, this should never
17404 * happen.
17405 */
17406 #endif /* MACH_ASSERT */
17407 } else {
17408 pmap_protect(vm_map_pmap(map),
17409 src_entry->vme_start,
17410 src_entry->vme_end,
17411 prot);
17412 }
17413 }
17414
17415 object = VME_OBJECT(src_entry);
17416 src_entry->needs_copy = FALSE;
17417 }
17418
17419
17420 vm_object_lock(object);
17421 vm_object_reference_locked(object); /* object ref. for new entry */
17422 assert(!src_entry->needs_copy);
17423 if (object->copy_strategy ==
17424 MEMORY_OBJECT_COPY_SYMMETRIC) {
17425 /*
17426 * If we want to share this object (copy==0),
17427 * it needs to be COPY_DELAY.
17428 * If we want to copy this object (copy==1),
17429 * we can't just set "needs_copy" on our side
17430 * and expect the other side to do the same
17431 * (symmetrically), so we can't let the object
17432 * stay COPY_SYMMETRIC.
17433 * So we always switch from COPY_SYMMETRIC to
17434 * COPY_DELAY.
17435 */
17436 object->copy_strategy =
17437 MEMORY_OBJECT_COPY_DELAY;
17438 object->true_share = TRUE;
17439 }
17440 vm_object_unlock(object);
17441 }
17442
17443 offset = (VME_OFFSET(src_entry) +
17444 (src_start - src_entry->vme_start));
17445
17446 copy_src_entry:
17447 new_entry = _vm_map_entry_create(map_header);
17448 vm_map_entry_copy(map, new_entry, src_entry);
17449 if (new_entry->is_sub_map) {
17450 /* clr address space specifics */
17451 new_entry->use_pmap = FALSE;
17452 } else if (copy) {
17453 /*
17454 * We're dealing with a copy-on-write operation,
17455 * so the resulting mapping should not inherit the
17456 * original mapping's accounting settings.
17457 * "use_pmap" should be reset to its default (TRUE)
17458 * so that the new mapping gets accounted for in
17459 * the task's memory footprint.
17460 */
17461 new_entry->use_pmap = TRUE;
17462 }
17463 /* "iokit_acct" was cleared in vm_map_entry_copy() */
17464 assert(!new_entry->iokit_acct);
17465
17466 new_entry->map_aligned = FALSE;
17467
17468 new_entry->vme_start = map_address;
17469 new_entry->vme_end = map_address + tmp_size;
17470 assert(new_entry->vme_start < new_entry->vme_end);
17471 if (copy && vmk_flags.vmkf_remap_prot_copy) {
17472 /* security: keep "permanent" and "csm_associated" */
17473 new_entry->vme_permanent = src_entry->vme_permanent;
17474 new_entry->csm_associated = src_entry->csm_associated;
17475 /*
17476 * Remapping for vm_map_protect(VM_PROT_COPY)
17477 * to convert a read-only mapping into a
17478 * copy-on-write version of itself but
17479 * with write access:
17480 * keep the original inheritance but let's not
17481 * add VM_PROT_WRITE to the max protection yet
17482 * since we want to do more security checks against
17483 * the target map.
17484 */
17485 new_entry->inheritance = src_entry->inheritance;
17486 new_entry->protection &= max_prot_for_prot_copy;
17487 } else {
17488 new_entry->inheritance = inheritance;
17489 if (!vm_remap_legacy) {
17490 new_entry->protection = *cur_protection;
17491 new_entry->max_protection = *max_protection;
17492 }
17493 }
17494 VME_OFFSET_SET(new_entry, offset);
17495
17496 /*
17497 * The new region has to be copied now if required.
17498 */
17499 RestartCopy:
17500 if (!copy) {
17501 if (src_entry->used_for_jit == TRUE) {
17502 if (same_map) {
17503 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17504 /*
17505 * Cannot allow an entry describing a JIT
17506 * region to be shared across address spaces.
17507 */
17508 result = KERN_INVALID_ARGUMENT;
17509 vm_object_deallocate(object);
17510 vm_map_entry_dispose(new_entry);
17511 new_entry = VM_MAP_ENTRY_NULL;
17512 break;
17513 }
17514 }
17515
17516 src_entry->is_shared = TRUE;
17517 new_entry->is_shared = TRUE;
17518 if (!(new_entry->is_sub_map)) {
17519 new_entry->needs_copy = FALSE;
17520 }
17521 } else if (src_entry->is_sub_map) {
17522 /* make this a COW sub_map if not already */
17523 assert(new_entry->wired_count == 0);
17524 new_entry->needs_copy = TRUE;
17525 object = VM_OBJECT_NULL;
17526 } else if (src_entry->wired_count == 0 &&
17527 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17528 vm_object_copy_quickly(VME_OBJECT(new_entry),
17529 VME_OFFSET(new_entry),
17530 (new_entry->vme_end -
17531 new_entry->vme_start),
17532 &src_needs_copy,
17533 &new_entry_needs_copy)) {
17534 new_entry->needs_copy = new_entry_needs_copy;
17535 new_entry->is_shared = FALSE;
17536 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17537
17538 /*
17539 * Handle copy_on_write semantics.
17540 */
17541 if (src_needs_copy && !src_entry->needs_copy) {
17542 vm_prot_t prot;
17543
17544 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17545
17546 prot = src_entry->protection & ~VM_PROT_WRITE;
17547
17548 if (override_nx(map,
17549 VME_ALIAS(src_entry))
17550 && prot) {
17551 prot |= VM_PROT_EXECUTE;
17552 }
17553
17554 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17555
17556 vm_object_pmap_protect(object,
17557 offset,
17558 entry_size,
17559 ((src_entry->is_shared
17560 || map->mapped_in_other_pmaps) ?
17561 PMAP_NULL : map->pmap),
17562 VM_MAP_PAGE_SIZE(map),
17563 src_entry->vme_start,
17564 prot);
17565
17566 assert(src_entry->wired_count == 0);
17567 src_entry->needs_copy = TRUE;
17568 }
17569 /*
17570 * Throw away the old object reference of the new entry.
17571 */
17572 vm_object_deallocate(object);
17573 } else {
17574 new_entry->is_shared = FALSE;
17575 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17576
17577 src_entry_was_wired = (src_entry->wired_count > 0);
17578 saved_src_entry = src_entry;
17579 src_entry = VM_MAP_ENTRY_NULL;
17580
17581 /*
17582 * The map can be safely unlocked since we
17583 * already hold a reference on the object.
17584 *
17585 * Record the timestamp of the map for later
17586 * verification, and unlock the map.
17587 */
17588 version.main_timestamp = map->timestamp;
17589 vm_map_unlock(map); /* Increments timestamp once! */
17590
17591 /*
17592 * Perform the copy.
17593 */
17594 if (src_entry_was_wired > 0 ||
17595 (debug4k_no_cow_copyin &&
17596 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17597 vm_object_lock(object);
17598 result = vm_object_copy_slowly(
17599 object,
17600 offset,
17601 (new_entry->vme_end -
17602 new_entry->vme_start),
17603 THREAD_UNINT,
17604 &new_copy_object);
17605 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17606 saved_used_for_jit = new_entry->used_for_jit;
17607 #if __arm64e__
17608 saved_used_for_tpro = new_entry->used_for_tpro;
17609 #endif
17610 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17611 new_entry->used_for_jit = saved_used_for_jit;
17612 #if __arm64e__
17613 new_entry->used_for_tpro = saved_used_for_tpro;
17614 #endif
17615 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17616 new_entry->needs_copy = FALSE;
17617 } else {
17618 vm_object_offset_t new_offset;
17619
17620 new_offset = VME_OFFSET(new_entry);
17621 result = vm_object_copy_strategically(
17622 object,
17623 offset,
17624 (new_entry->vme_end -
17625 new_entry->vme_start),
17626 &new_copy_object,
17627 &new_offset,
17628 &new_entry_needs_copy);
17629 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17630 saved_used_for_jit = new_entry->used_for_jit;
17631 #if __arm64e__
17632 saved_used_for_tpro = new_entry->used_for_tpro;
17633 #endif
17634 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17635 new_entry->used_for_jit = saved_used_for_jit;
17636 #if __arm64e__
17637 new_entry->used_for_tpro = saved_used_for_tpro;
17638 #endif
17639 if (new_offset != VME_OFFSET(new_entry)) {
17640 VME_OFFSET_SET(new_entry, new_offset);
17641 }
17642
17643 new_entry->needs_copy = new_entry_needs_copy;
17644 }
17645
17646 /*
17647 * Throw away the old object reference of the new entry.
17648 */
17649 vm_object_deallocate(object);
17650
17651 if (result != KERN_SUCCESS &&
17652 result != KERN_MEMORY_RESTART_COPY) {
17653 vm_map_entry_dispose(new_entry);
17654 vm_map_lock(map);
17655 break;
17656 }
17657
17658 /*
17659 * Verify that the map has not substantially
17660 * changed while the copy was being made.
17661 */
17662
17663 vm_map_lock(map);
17664 if (version.main_timestamp + 1 != map->timestamp) {
17665 /*
17666 * Simple version comparison failed.
17667 *
17668 * Retry the lookup and verify that the
17669 * same object/offset are still present.
17670 */
17671 saved_src_entry = VM_MAP_ENTRY_NULL;
17672 vm_object_deallocate(VME_OBJECT(new_entry));
17673 vm_map_entry_dispose(new_entry);
17674 if (result == KERN_MEMORY_RESTART_COPY) {
17675 result = KERN_SUCCESS;
17676 }
17677 continue;
17678 }
17679 /* map hasn't changed: src_entry is still valid */
17680 src_entry = saved_src_entry;
17681 saved_src_entry = VM_MAP_ENTRY_NULL;
17682
17683 if (result == KERN_MEMORY_RESTART_COPY) {
17684 vm_object_reference(object);
17685 goto RestartCopy;
17686 }
17687 }
17688
17689 _vm_map_store_entry_link(map_header,
17690 map_header->links.prev, new_entry);
17691
17692 /* protections for submap mapping are irrelevant here */
17693 if (vm_remap_legacy && !src_entry->is_sub_map) {
17694 *cur_protection &= src_entry->protection;
17695 *max_protection &= src_entry->max_protection;
17696 }
17697
17698 map_address += tmp_size;
17699 mapped_size += tmp_size;
17700 src_start += tmp_size;
17701
17702 if (vmk_flags.vmkf_copy_single_object) {
17703 if (mapped_size != size) {
17704 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n",
17705 map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17706 if (src_entry->vme_next != vm_map_to_entry(map) &&
17707 src_entry->vme_next->vme_object_value ==
17708 src_entry->vme_object_value) {
17709 /* XXX TODO4K */
17710 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17711 }
17712 }
17713 break;
17714 }
17715 } /* end while */
17716
17717 vm_map_unlock(map);
17718 if (result != KERN_SUCCESS) {
17719 /*
17720 * Free all allocated elements.
17721 */
17722 for (src_entry = map_header->links.next;
17723 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17724 src_entry = new_entry) {
17725 new_entry = src_entry->vme_next;
17726 _vm_map_store_entry_unlink(map_header, src_entry, false);
17727 if (src_entry->is_sub_map) {
17728 vm_map_deallocate(VME_SUBMAP(src_entry));
17729 } else {
17730 vm_object_deallocate(VME_OBJECT(src_entry));
17731 }
17732 vm_map_entry_dispose(src_entry);
17733 }
17734 }
17735 return result;
17736 }
17737
17738 bool
vm_map_is_exotic(vm_map_t map)17739 vm_map_is_exotic(
17740 vm_map_t map)
17741 {
17742 return VM_MAP_IS_EXOTIC(map);
17743 }
17744
17745 bool
vm_map_is_alien(vm_map_t map)17746 vm_map_is_alien(
17747 vm_map_t map)
17748 {
17749 return VM_MAP_IS_ALIEN(map);
17750 }
17751
17752 #if XNU_TARGET_OS_OSX
17753 void
vm_map_mark_alien(vm_map_t map)17754 vm_map_mark_alien(
17755 vm_map_t map)
17756 {
17757 vm_map_lock(map);
17758 map->is_alien = true;
17759 vm_map_unlock(map);
17760 }
17761
17762 void
vm_map_single_jit(vm_map_t map)17763 vm_map_single_jit(
17764 vm_map_t map)
17765 {
17766 vm_map_lock(map);
17767 map->single_jit = true;
17768 vm_map_unlock(map);
17769 }
17770 #endif /* XNU_TARGET_OS_OSX */
17771
17772 /*
17773 * Callers of this function must call vm_map_copy_require on
17774 * previously created vm_map_copy_t or pass a newly created
17775 * one to ensure that it hasn't been forged.
17776 */
17777 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17778 vm_map_copy_to_physcopy(
17779 vm_map_copy_t copy_map,
17780 vm_map_t target_map)
17781 {
17782 vm_map_size_t size;
17783 vm_map_entry_t entry;
17784 vm_map_entry_t new_entry;
17785 vm_object_t new_object;
17786 unsigned int pmap_flags;
17787 pmap_t new_pmap;
17788 vm_map_t new_map;
17789 vm_map_address_t src_start, src_end, src_cur;
17790 vm_map_address_t dst_start, dst_end, dst_cur;
17791 kern_return_t kr;
17792 void *kbuf;
17793
17794 /*
17795 * Perform the equivalent of vm_allocate() and memcpy().
17796 * Replace the mappings in "copy_map" with the newly allocated mapping.
17797 */
17798 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17799
17800 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17801
17802 /* create a new pmap to map "copy_map" */
17803 pmap_flags = 0;
17804 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17805 #if PMAP_CREATE_FORCE_4K_PAGES
17806 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17807 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17808 pmap_flags |= PMAP_CREATE_64BIT;
17809 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17810 if (new_pmap == NULL) {
17811 return KERN_RESOURCE_SHORTAGE;
17812 }
17813
17814 /* allocate new VM object */
17815 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17816 new_object = vm_object_allocate(size);
17817 assert(new_object);
17818
17819 /* allocate new VM map entry */
17820 new_entry = vm_map_copy_entry_create(copy_map);
17821 assert(new_entry);
17822
17823 /* finish initializing new VM map entry */
17824 new_entry->protection = VM_PROT_DEFAULT;
17825 new_entry->max_protection = VM_PROT_DEFAULT;
17826 new_entry->use_pmap = TRUE;
17827
17828 /* make new VM map entry point to new VM object */
17829 new_entry->vme_start = 0;
17830 new_entry->vme_end = size;
17831 VME_OBJECT_SET(new_entry, new_object, false, 0);
17832 VME_OFFSET_SET(new_entry, 0);
17833
17834 /* create a new pageable VM map to map "copy_map" */
17835 new_map = vm_map_create_options(new_pmap, 0, MACH_VM_MAX_ADDRESS,
17836 VM_MAP_CREATE_PAGEABLE);
17837 assert(new_map);
17838 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17839
17840 /* map "copy_map" in the new VM map */
17841 src_start = 0;
17842 kr = vm_map_copyout_internal(
17843 new_map,
17844 &src_start,
17845 copy_map,
17846 copy_map->size,
17847 FALSE, /* consume_on_success */
17848 VM_PROT_DEFAULT,
17849 VM_PROT_DEFAULT,
17850 VM_INHERIT_DEFAULT);
17851 assert(kr == KERN_SUCCESS);
17852 src_end = src_start + copy_map->size;
17853
17854 /* map "new_object" in the new VM map */
17855 vm_object_reference(new_object);
17856 dst_start = 0;
17857 kr = vm_map_enter(new_map,
17858 &dst_start,
17859 size,
17860 0, /* mask */
17861 VM_MAP_KERNEL_FLAGS_ANYWHERE(.vm_tag = VM_KERN_MEMORY_OSFMK),
17862 new_object,
17863 0, /* offset */
17864 FALSE, /* needs copy */
17865 VM_PROT_DEFAULT,
17866 VM_PROT_DEFAULT,
17867 VM_INHERIT_DEFAULT);
17868 assert(kr == KERN_SUCCESS);
17869 dst_end = dst_start + size;
17870
17871 /* get a kernel buffer */
17872 kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17873
17874 /* physically copy "copy_map" mappings to new VM object */
17875 for (src_cur = src_start, dst_cur = dst_start;
17876 src_cur < src_end;
17877 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17878 vm_size_t bytes;
17879
17880 bytes = PAGE_SIZE;
17881 if (src_cur + PAGE_SIZE > src_end) {
17882 /* partial copy for last page */
17883 bytes = src_end - src_cur;
17884 assert(bytes > 0 && bytes < PAGE_SIZE);
17885 /* rest of dst page should be zero-filled */
17886 }
17887 /* get bytes from src mapping */
17888 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17889 if (kr != KERN_SUCCESS) {
17890 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17891 }
17892 /* put bytes in dst mapping */
17893 assert(dst_cur < dst_end);
17894 assert(dst_cur + bytes <= dst_end);
17895 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17896 if (kr != KERN_SUCCESS) {
17897 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17898 }
17899 }
17900
17901 /* free kernel buffer */
17902 kfree_data(kbuf, PAGE_SIZE);
17903
17904 /* destroy new map */
17905 vm_map_destroy(new_map);
17906 new_map = VM_MAP_NULL;
17907
17908 /* dispose of the old map entries in "copy_map" */
17909 while (vm_map_copy_first_entry(copy_map) !=
17910 vm_map_copy_to_entry(copy_map)) {
17911 entry = vm_map_copy_first_entry(copy_map);
17912 vm_map_copy_entry_unlink(copy_map, entry);
17913 if (entry->is_sub_map) {
17914 vm_map_deallocate(VME_SUBMAP(entry));
17915 } else {
17916 vm_object_deallocate(VME_OBJECT(entry));
17917 }
17918 vm_map_copy_entry_dispose(entry);
17919 }
17920
17921 /* change "copy_map"'s page_size to match "target_map" */
17922 copy_map->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17923 copy_map->offset = 0;
17924 copy_map->size = size;
17925
17926 /* insert new map entry in "copy_map" */
17927 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17928 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17929
17930 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17931 return KERN_SUCCESS;
17932 }
17933
17934 void
17935 vm_map_copy_adjust_get_target_copy_map(
17936 vm_map_copy_t copy_map,
17937 vm_map_copy_t *target_copy_map_p);
17938 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17939 vm_map_copy_adjust_get_target_copy_map(
17940 vm_map_copy_t copy_map,
17941 vm_map_copy_t *target_copy_map_p)
17942 {
17943 vm_map_copy_t target_copy_map;
17944 vm_map_entry_t entry, target_entry;
17945
17946 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17947 /* the caller already has a "target_copy_map": use it */
17948 return;
17949 }
17950
17951 /* the caller wants us to create a new copy of "copy_map" */
17952 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17953 target_copy_map = vm_map_copy_allocate(copy_map->type);
17954 target_copy_map->offset = copy_map->offset;
17955 target_copy_map->size = copy_map->size;
17956 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17957 for (entry = vm_map_copy_first_entry(copy_map);
17958 entry != vm_map_copy_to_entry(copy_map);
17959 entry = entry->vme_next) {
17960 target_entry = vm_map_copy_entry_create(target_copy_map);
17961 vm_map_entry_copy_full(target_entry, entry);
17962 if (target_entry->is_sub_map) {
17963 vm_map_reference(VME_SUBMAP(target_entry));
17964 } else {
17965 vm_object_reference(VME_OBJECT(target_entry));
17966 }
17967 vm_map_copy_entry_link(
17968 target_copy_map,
17969 vm_map_copy_last_entry(target_copy_map),
17970 target_entry);
17971 }
17972 entry = VM_MAP_ENTRY_NULL;
17973 *target_copy_map_p = target_copy_map;
17974 }
17975
17976 /*
17977 * Callers of this function must call vm_map_copy_require on
17978 * previously created vm_map_copy_t or pass a newly created
17979 * one to ensure that it hasn't been forged.
17980 */
17981 static void
vm_map_copy_trim(vm_map_copy_t copy_map,uint16_t new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17982 vm_map_copy_trim(
17983 vm_map_copy_t copy_map,
17984 uint16_t new_page_shift,
17985 vm_map_offset_t trim_start,
17986 vm_map_offset_t trim_end)
17987 {
17988 uint16_t copy_page_shift;
17989 vm_map_entry_t entry, next_entry;
17990
17991 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17992 assert(copy_map->cpy_hdr.nentries > 0);
17993
17994 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17995 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17996
17997 /* use the new page_shift to do the clipping */
17998 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17999 copy_map->cpy_hdr.page_shift = new_page_shift;
18000
18001 for (entry = vm_map_copy_first_entry(copy_map);
18002 entry != vm_map_copy_to_entry(copy_map);
18003 entry = next_entry) {
18004 next_entry = entry->vme_next;
18005 if (entry->vme_end <= trim_start) {
18006 /* entry fully before trim range: skip */
18007 continue;
18008 }
18009 if (entry->vme_start >= trim_end) {
18010 /* entry fully after trim range: done */
18011 break;
18012 }
18013 /* clip entry if needed */
18014 vm_map_copy_clip_start(copy_map, entry, trim_start);
18015 vm_map_copy_clip_end(copy_map, entry, trim_end);
18016 /* dispose of entry */
18017 copy_map->size -= entry->vme_end - entry->vme_start;
18018 vm_map_copy_entry_unlink(copy_map, entry);
18019 if (entry->is_sub_map) {
18020 vm_map_deallocate(VME_SUBMAP(entry));
18021 } else {
18022 vm_object_deallocate(VME_OBJECT(entry));
18023 }
18024 vm_map_copy_entry_dispose(entry);
18025 entry = VM_MAP_ENTRY_NULL;
18026 }
18027
18028 /* restore copy_map's original page_shift */
18029 copy_map->cpy_hdr.page_shift = copy_page_shift;
18030 }
18031
18032 /*
18033 * Make any necessary adjustments to "copy_map" to allow it to be
18034 * mapped into "target_map".
18035 * If no changes were necessary, "target_copy_map" points to the
18036 * untouched "copy_map".
18037 * If changes are necessary, changes will be made to "target_copy_map".
18038 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
18039 * copy the original "copy_map" to it before applying the changes.
18040 * The caller should discard "target_copy_map" if it's not the same as
18041 * the original "copy_map".
18042 */
18043 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
18044 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)18045 vm_map_copy_adjust_to_target(
18046 vm_map_copy_t src_copy_map,
18047 vm_map_offset_t offset,
18048 vm_map_size_t size,
18049 vm_map_t target_map,
18050 boolean_t copy,
18051 vm_map_copy_t *target_copy_map_p,
18052 vm_map_offset_t *overmap_start_p,
18053 vm_map_offset_t *overmap_end_p,
18054 vm_map_offset_t *trimmed_start_p)
18055 {
18056 vm_map_copy_t copy_map, target_copy_map;
18057 vm_map_size_t target_size;
18058 vm_map_size_t src_copy_map_size;
18059 vm_map_size_t overmap_start, overmap_end;
18060 int misalignments;
18061 vm_map_entry_t entry, target_entry;
18062 vm_map_offset_t addr_adjustment;
18063 vm_map_offset_t new_start, new_end;
18064 int copy_page_mask, target_page_mask;
18065 uint16_t copy_page_shift, target_page_shift;
18066 vm_map_offset_t trimmed_end;
18067
18068 /*
18069 * Assert that the vm_map_copy is coming from the right
18070 * zone and hasn't been forged
18071 */
18072 vm_map_copy_require(src_copy_map);
18073 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
18074
18075 /*
18076 * Start working with "src_copy_map" but we'll switch
18077 * to "target_copy_map" as soon as we start making adjustments.
18078 */
18079 copy_map = src_copy_map;
18080 src_copy_map_size = src_copy_map->size;
18081
18082 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
18083 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
18084 target_page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
18085 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18086
18087 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
18088
18089 target_copy_map = *target_copy_map_p;
18090 if (target_copy_map != VM_MAP_COPY_NULL) {
18091 vm_map_copy_require(target_copy_map);
18092 }
18093
18094 if (offset + size > copy_map->size) {
18095 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
18096 return KERN_INVALID_ARGUMENT;
18097 }
18098
18099 /* trim the end */
18100 trimmed_end = 0;
18101 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
18102 if (new_end < copy_map->size) {
18103 trimmed_end = src_copy_map_size - new_end;
18104 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
18105 /* get "target_copy_map" if needed and adjust it */
18106 vm_map_copy_adjust_get_target_copy_map(copy_map,
18107 &target_copy_map);
18108 copy_map = target_copy_map;
18109 vm_map_copy_trim(target_copy_map, target_page_shift,
18110 new_end, copy_map->size);
18111 }
18112
18113 /* trim the start */
18114 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
18115 if (new_start != 0) {
18116 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
18117 /* get "target_copy_map" if needed and adjust it */
18118 vm_map_copy_adjust_get_target_copy_map(copy_map,
18119 &target_copy_map);
18120 copy_map = target_copy_map;
18121 vm_map_copy_trim(target_copy_map, target_page_shift,
18122 0, new_start);
18123 }
18124 *trimmed_start_p = new_start;
18125
18126 /* target_size starts with what's left after trimming */
18127 target_size = copy_map->size;
18128 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
18129 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
18130 (uint64_t)target_size, (uint64_t)src_copy_map_size,
18131 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
18132
18133 /* check for misalignments but don't adjust yet */
18134 misalignments = 0;
18135 overmap_start = 0;
18136 overmap_end = 0;
18137 if (copy_page_shift < target_page_shift) {
18138 /*
18139 * Remapping from 4K to 16K: check the VM object alignments
18140 * throughout the range.
18141 * If the start and end of the range are mis-aligned, we can
18142 * over-map to re-align, and adjust the "overmap" start/end
18143 * and "target_size" of the range accordingly.
18144 * If there is any mis-alignment within the range:
18145 * if "copy":
18146 * we can do immediate-copy instead of copy-on-write,
18147 * else:
18148 * no way to remap and share; fail.
18149 */
18150 for (entry = vm_map_copy_first_entry(copy_map);
18151 entry != vm_map_copy_to_entry(copy_map);
18152 entry = entry->vme_next) {
18153 vm_object_offset_t object_offset_start, object_offset_end;
18154
18155 object_offset_start = VME_OFFSET(entry);
18156 object_offset_end = object_offset_start;
18157 object_offset_end += entry->vme_end - entry->vme_start;
18158 if (object_offset_start & target_page_mask) {
18159 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
18160 overmap_start++;
18161 } else {
18162 misalignments++;
18163 }
18164 }
18165 if (object_offset_end & target_page_mask) {
18166 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
18167 overmap_end++;
18168 } else {
18169 misalignments++;
18170 }
18171 }
18172 }
18173 }
18174 entry = VM_MAP_ENTRY_NULL;
18175
18176 /* decide how to deal with misalignments */
18177 assert(overmap_start <= 1);
18178 assert(overmap_end <= 1);
18179 if (!overmap_start && !overmap_end && !misalignments) {
18180 /* copy_map is properly aligned for target_map ... */
18181 if (*trimmed_start_p) {
18182 /* ... but we trimmed it, so still need to adjust */
18183 } else {
18184 /* ... and we didn't trim anything: we're done */
18185 if (target_copy_map == VM_MAP_COPY_NULL) {
18186 target_copy_map = copy_map;
18187 }
18188 *target_copy_map_p = target_copy_map;
18189 *overmap_start_p = 0;
18190 *overmap_end_p = 0;
18191 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18192 return KERN_SUCCESS;
18193 }
18194 } else if (misalignments && !copy) {
18195 /* can't "share" if misaligned */
18196 DEBUG4K_ADJUST("unsupported sharing\n");
18197 #if MACH_ASSERT
18198 if (debug4k_panic_on_misaligned_sharing) {
18199 panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
18200 }
18201 #endif /* MACH_ASSERT */
18202 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
18203 return KERN_NOT_SUPPORTED;
18204 } else {
18205 /* can't virtual-copy if misaligned (but can physical-copy) */
18206 DEBUG4K_ADJUST("mis-aligned copying\n");
18207 }
18208
18209 /* get a "target_copy_map" if needed and switch to it */
18210 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
18211 copy_map = target_copy_map;
18212
18213 if (misalignments && copy) {
18214 vm_map_size_t target_copy_map_size;
18215
18216 /*
18217 * Can't do copy-on-write with misaligned mappings.
18218 * Replace the mappings with a physical copy of the original
18219 * mappings' contents.
18220 */
18221 target_copy_map_size = target_copy_map->size;
18222 kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
18223 if (kr != KERN_SUCCESS) {
18224 return kr;
18225 }
18226 *target_copy_map_p = target_copy_map;
18227 *overmap_start_p = 0;
18228 *overmap_end_p = target_copy_map->size - target_copy_map_size;
18229 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18230 return KERN_SUCCESS;
18231 }
18232
18233 /* apply the adjustments */
18234 misalignments = 0;
18235 overmap_start = 0;
18236 overmap_end = 0;
18237 /* remove copy_map->offset, so that everything starts at offset 0 */
18238 addr_adjustment = copy_map->offset;
18239 /* also remove whatever we trimmed from the start */
18240 addr_adjustment += *trimmed_start_p;
18241 for (target_entry = vm_map_copy_first_entry(target_copy_map);
18242 target_entry != vm_map_copy_to_entry(target_copy_map);
18243 target_entry = target_entry->vme_next) {
18244 vm_object_offset_t object_offset_start, object_offset_end;
18245
18246 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18247 object_offset_start = VME_OFFSET(target_entry);
18248 if (object_offset_start & target_page_mask) {
18249 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18250 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18251 /*
18252 * start of 1st entry is mis-aligned:
18253 * re-adjust by over-mapping.
18254 */
18255 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
18256 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
18257 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
18258 } else {
18259 misalignments++;
18260 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18261 assert(copy);
18262 }
18263 }
18264
18265 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18266 target_size += overmap_start;
18267 } else {
18268 target_entry->vme_start += overmap_start;
18269 }
18270 target_entry->vme_end += overmap_start;
18271
18272 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
18273 if (object_offset_end & target_page_mask) {
18274 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18275 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
18276 /*
18277 * end of last entry is mis-aligned: re-adjust by over-mapping.
18278 */
18279 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
18280 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
18281 target_entry->vme_end += overmap_end;
18282 target_size += overmap_end;
18283 } else {
18284 misalignments++;
18285 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18286 assert(copy);
18287 }
18288 }
18289 target_entry->vme_start -= addr_adjustment;
18290 target_entry->vme_end -= addr_adjustment;
18291 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18292 }
18293
18294 target_copy_map->size = target_size;
18295 target_copy_map->offset += overmap_start;
18296 target_copy_map->offset -= addr_adjustment;
18297 target_copy_map->cpy_hdr.page_shift = target_page_shift;
18298
18299 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
18300 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
18301 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
18302 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
18303
18304 *target_copy_map_p = target_copy_map;
18305 *overmap_start_p = overmap_start;
18306 *overmap_end_p = overmap_end;
18307
18308 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18309 return KERN_SUCCESS;
18310 }
18311
18312 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)18313 vm_map_range_physical_size(
18314 vm_map_t map,
18315 vm_map_address_t start,
18316 mach_vm_size_t size,
18317 mach_vm_size_t * phys_size)
18318 {
18319 kern_return_t kr;
18320 vm_map_copy_t copy_map, target_copy_map;
18321 vm_map_offset_t adjusted_start, adjusted_end;
18322 vm_map_size_t adjusted_size;
18323 vm_prot_t cur_prot, max_prot;
18324 vm_map_offset_t overmap_start, overmap_end, trimmed_start, end;
18325 vm_map_kernel_flags_t vmk_flags;
18326
18327 if (size == 0) {
18328 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size);
18329 *phys_size = 0;
18330 return KERN_SUCCESS;
18331 }
18332
18333 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
18334 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
18335 if (__improbable(os_add_overflow(start, size, &end) ||
18336 adjusted_end <= adjusted_start)) {
18337 /* wraparound */
18338 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, VM_MAP_PAGE_MASK(map));
18339 *phys_size = 0;
18340 return KERN_INVALID_ARGUMENT;
18341 }
18342 if (__improbable(vm_map_range_overflows(map, start, size))) {
18343 *phys_size = 0;
18344 return KERN_INVALID_ADDRESS;
18345 }
18346 assert(adjusted_end > adjusted_start);
18347 adjusted_size = adjusted_end - adjusted_start;
18348 *phys_size = adjusted_size;
18349 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
18350 return KERN_SUCCESS;
18351 }
18352 if (start == 0) {
18353 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
18354 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
18355 if (__improbable(adjusted_end <= adjusted_start)) {
18356 /* wraparound */
18357 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, PAGE_MASK);
18358 *phys_size = 0;
18359 return KERN_INVALID_ARGUMENT;
18360 }
18361 assert(adjusted_end > adjusted_start);
18362 adjusted_size = adjusted_end - adjusted_start;
18363 *phys_size = adjusted_size;
18364 return KERN_SUCCESS;
18365 }
18366
18367 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
18368 vmk_flags.vmkf_copy_pageable = TRUE;
18369 vmk_flags.vmkf_copy_same_map = TRUE;
18370 assert(adjusted_size != 0);
18371 cur_prot = VM_PROT_NONE; /* legacy mode */
18372 max_prot = VM_PROT_NONE; /* legacy mode */
18373 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
18374 FALSE /* copy */,
18375 ©_map,
18376 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
18377 vmk_flags);
18378 if (kr != KERN_SUCCESS) {
18379 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18380 //assert(0);
18381 *phys_size = 0;
18382 return kr;
18383 }
18384 assert(copy_map != VM_MAP_COPY_NULL);
18385 target_copy_map = copy_map;
18386 DEBUG4K_ADJUST("adjusting...\n");
18387 kr = vm_map_copy_adjust_to_target(
18388 copy_map,
18389 start - adjusted_start, /* offset */
18390 size, /* size */
18391 kernel_map,
18392 FALSE, /* copy */
18393 &target_copy_map,
18394 &overmap_start,
18395 &overmap_end,
18396 &trimmed_start);
18397 if (kr == KERN_SUCCESS) {
18398 if (target_copy_map->size != *phys_size) {
18399 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
18400 }
18401 *phys_size = target_copy_map->size;
18402 } else {
18403 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18404 //assert(0);
18405 *phys_size = 0;
18406 }
18407 vm_map_copy_discard(copy_map);
18408 copy_map = VM_MAP_COPY_NULL;
18409
18410 return kr;
18411 }
18412
18413
18414 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)18415 memory_entry_check_for_adjustment(
18416 vm_map_t src_map,
18417 ipc_port_t port,
18418 vm_map_offset_t *overmap_start,
18419 vm_map_offset_t *overmap_end)
18420 {
18421 kern_return_t kr = KERN_SUCCESS;
18422 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
18423
18424 assert(port);
18425 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
18426
18427 vm_named_entry_t named_entry;
18428
18429 named_entry = mach_memory_entry_from_port(port);
18430 named_entry_lock(named_entry);
18431 copy_map = named_entry->backing.copy;
18432 target_copy_map = copy_map;
18433
18434 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
18435 vm_map_offset_t trimmed_start;
18436
18437 trimmed_start = 0;
18438 DEBUG4K_ADJUST("adjusting...\n");
18439 kr = vm_map_copy_adjust_to_target(
18440 copy_map,
18441 0, /* offset */
18442 copy_map->size, /* size */
18443 src_map,
18444 FALSE, /* copy */
18445 &target_copy_map,
18446 overmap_start,
18447 overmap_end,
18448 &trimmed_start);
18449 assert(trimmed_start == 0);
18450 }
18451 named_entry_unlock(named_entry);
18452
18453 return kr;
18454 }
18455
18456
18457 /*
18458 * Routine: vm_remap
18459 *
18460 * Map portion of a task's address space.
18461 * Mapped region must not overlap more than
18462 * one vm memory object. Protections and
18463 * inheritance attributes remain the same
18464 * as in the original task and are out parameters.
18465 * Source and Target task can be identical
18466 * Other attributes are identical as for vm_map()
18467 */
18468 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)18469 vm_map_remap(
18470 vm_map_t target_map,
18471 vm_map_address_t *address,
18472 vm_map_size_t size,
18473 vm_map_offset_t mask,
18474 vm_map_kernel_flags_t vmk_flags,
18475 vm_map_t src_map,
18476 vm_map_offset_t memory_address,
18477 boolean_t copy,
18478 vm_prot_t *cur_protection, /* IN/OUT */
18479 vm_prot_t *max_protection, /* IN/OUT */
18480 vm_inherit_t inheritance)
18481 {
18482 kern_return_t result;
18483 vm_map_entry_t entry;
18484 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
18485 vm_map_entry_t new_entry;
18486 vm_map_copy_t copy_map;
18487 vm_map_offset_t offset_in_mapping;
18488 vm_map_size_t target_size = 0;
18489 vm_map_size_t src_page_mask, target_page_mask;
18490 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
18491 vm_map_offset_t initial_memory_address;
18492 vm_map_size_t initial_size;
18493 VM_MAP_ZAP_DECLARE(zap_list);
18494
18495 if (target_map == VM_MAP_NULL) {
18496 return KERN_INVALID_ARGUMENT;
18497 }
18498
18499 if (__improbable(vm_map_range_overflows(src_map, memory_address, size))) {
18500 return KERN_INVALID_ARGUMENT;
18501 }
18502
18503 initial_memory_address = memory_address;
18504 initial_size = size;
18505 src_page_mask = VM_MAP_PAGE_MASK(src_map);
18506 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18507
18508 switch (inheritance) {
18509 case VM_INHERIT_NONE:
18510 case VM_INHERIT_COPY:
18511 case VM_INHERIT_SHARE:
18512 if (size != 0 && src_map != VM_MAP_NULL) {
18513 break;
18514 }
18515 OS_FALLTHROUGH;
18516 default:
18517 return KERN_INVALID_ARGUMENT;
18518 }
18519
18520 if (src_page_mask != target_page_mask) {
18521 if (copy) {
18522 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18523 } else {
18524 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18525 }
18526 }
18527
18528 /*
18529 * If the user is requesting that we return the address of the
18530 * first byte of the data (rather than the base of the page),
18531 * then we use different rounding semantics: specifically,
18532 * we assume that (memory_address, size) describes a region
18533 * all of whose pages we must cover, rather than a base to be truncated
18534 * down and a size to be added to that base. So we figure out
18535 * the highest page that the requested region includes and make
18536 * sure that the size will cover it.
18537 *
18538 * The key example we're worried about it is of the form:
18539 *
18540 * memory_address = 0x1ff0, size = 0x20
18541 *
18542 * With the old semantics, we round down the memory_address to 0x1000
18543 * and round up the size to 0x1000, resulting in our covering *only*
18544 * page 0x1000. With the new semantics, we'd realize that the region covers
18545 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
18546 * 0x1000 and page 0x2000 in the region we remap.
18547 */
18548 if (vmk_flags.vmf_return_data_addr) {
18549 vm_map_offset_t range_start, range_end;
18550
18551 range_start = vm_map_trunc_page(memory_address, src_page_mask);
18552 range_end = vm_map_round_page(memory_address + size, src_page_mask);
18553 memory_address = range_start;
18554 size = range_end - range_start;
18555 offset_in_mapping = initial_memory_address - memory_address;
18556 } else {
18557 /*
18558 * IMPORTANT:
18559 * This legacy code path is broken: for the range mentioned
18560 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18561 * two 4k pages, it yields [ memory_address = 0x1000,
18562 * size = 0x1000 ], which covers only the first 4k page.
18563 * BUT some code unfortunately depends on this bug, so we
18564 * can't fix it without breaking something.
18565 * New code should get automatically opted in the new
18566 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18567 */
18568 offset_in_mapping = 0;
18569 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18570 size = vm_map_round_page(size, src_page_mask);
18571 initial_memory_address = memory_address;
18572 initial_size = size;
18573 }
18574
18575
18576 if (size == 0) {
18577 return KERN_INVALID_ARGUMENT;
18578 }
18579
18580 if (vmk_flags.vmf_resilient_media) {
18581 /* must be copy-on-write to be "media resilient" */
18582 if (!copy) {
18583 return KERN_INVALID_ARGUMENT;
18584 }
18585 }
18586
18587 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18588 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18589
18590 assert(size != 0);
18591 result = vm_map_copy_extract(src_map,
18592 memory_address,
18593 size,
18594 copy, ©_map,
18595 cur_protection, /* IN/OUT */
18596 max_protection, /* IN/OUT */
18597 inheritance,
18598 vmk_flags);
18599 if (result != KERN_SUCCESS) {
18600 return result;
18601 }
18602 assert(copy_map != VM_MAP_COPY_NULL);
18603
18604 /*
18605 * Handle the policy for vm map ranges
18606 *
18607 * If the maps differ, the target_map policy applies like for vm_map()
18608 * For same mapping remaps, we preserve the range.
18609 */
18610 if (vmk_flags.vmkf_copy_same_map) {
18611 vmk_flags.vmkf_range_id = copy_map->orig_range;
18612 } else {
18613 vm_map_kernel_flags_update_range_id(&vmk_flags, target_map);
18614 }
18615
18616 overmap_start = 0;
18617 overmap_end = 0;
18618 trimmed_start = 0;
18619 target_size = size;
18620 if (src_page_mask != target_page_mask) {
18621 vm_map_copy_t target_copy_map;
18622
18623 target_copy_map = copy_map; /* can modify "copy_map" itself */
18624 DEBUG4K_ADJUST("adjusting...\n");
18625 result = vm_map_copy_adjust_to_target(
18626 copy_map,
18627 offset_in_mapping, /* offset */
18628 initial_size,
18629 target_map,
18630 copy,
18631 &target_copy_map,
18632 &overmap_start,
18633 &overmap_end,
18634 &trimmed_start);
18635 if (result != KERN_SUCCESS) {
18636 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18637 vm_map_copy_discard(copy_map);
18638 return result;
18639 }
18640 if (trimmed_start == 0) {
18641 /* nothing trimmed: no adjustment needed */
18642 } else if (trimmed_start >= offset_in_mapping) {
18643 /* trimmed more than offset_in_mapping: nothing left */
18644 assert(overmap_start == 0);
18645 assert(overmap_end == 0);
18646 offset_in_mapping = 0;
18647 } else {
18648 /* trimmed some of offset_in_mapping: adjust */
18649 assert(overmap_start == 0);
18650 assert(overmap_end == 0);
18651 offset_in_mapping -= trimmed_start;
18652 }
18653 offset_in_mapping += overmap_start;
18654 target_size = target_copy_map->size;
18655 }
18656
18657 /*
18658 * Allocate/check a range of free virtual address
18659 * space for the target
18660 */
18661 *address = vm_map_trunc_page(*address, target_page_mask);
18662 vm_map_lock(target_map);
18663 target_size = vm_map_round_page(target_size, target_page_mask);
18664 result = vm_map_remap_range_allocate(target_map, address,
18665 target_size, mask, vmk_flags,
18666 &insp_entry, &zap_list);
18667
18668 for (entry = vm_map_copy_first_entry(copy_map);
18669 entry != vm_map_copy_to_entry(copy_map);
18670 entry = new_entry) {
18671 new_entry = entry->vme_next;
18672 vm_map_copy_entry_unlink(copy_map, entry);
18673 if (result == KERN_SUCCESS) {
18674 if (vmk_flags.vmkf_remap_prot_copy) {
18675 /*
18676 * This vm_map_remap() is for a
18677 * vm_protect(VM_PROT_COPY), so the caller
18678 * expects to be allowed to add write access
18679 * to this new mapping. This is done by
18680 * adding VM_PROT_WRITE to each entry's
18681 * max_protection... unless some security
18682 * settings disallow it.
18683 */
18684 bool allow_write = false;
18685 if (entry->vme_permanent) {
18686 /* immutable mapping... */
18687 if ((entry->max_protection & VM_PROT_EXECUTE) &&
18688 developer_mode_state()) {
18689 /*
18690 * ... but executable and
18691 * possibly being debugged,
18692 * so let's allow it to become
18693 * writable, for breakpoints
18694 * and dtrace probes, for
18695 * example.
18696 */
18697 allow_write = true;
18698 } else {
18699 printf("%d[%s] vm_remap(0x%llx,0x%llx) VM_PROT_COPY denied on permanent mapping prot 0x%x/0x%x developer %d\n",
18700 proc_selfpid(),
18701 (get_bsdtask_info(current_task())
18702 ? proc_name_address(get_bsdtask_info(current_task()))
18703 : "?"),
18704 (uint64_t)memory_address,
18705 (uint64_t)size,
18706 entry->protection,
18707 entry->max_protection,
18708 developer_mode_state());
18709 DTRACE_VM6(vm_map_delete_permanent_deny_protcopy,
18710 vm_map_entry_t, entry,
18711 vm_map_offset_t, entry->vme_start,
18712 vm_map_offset_t, entry->vme_end,
18713 vm_prot_t, entry->protection,
18714 vm_prot_t, entry->max_protection,
18715 int, VME_ALIAS(entry));
18716 }
18717 } else {
18718 allow_write = true;
18719 }
18720
18721 /*
18722 * VM_PROT_COPY: allow this mapping to become
18723 * writable, unless it was "permanent".
18724 */
18725 if (allow_write) {
18726 entry->max_protection |= VM_PROT_WRITE;
18727 }
18728 }
18729 if (vmk_flags.vmf_resilient_codesign) {
18730 /* no codesigning -> read-only access */
18731 entry->max_protection = VM_PROT_READ;
18732 entry->protection = VM_PROT_READ;
18733 entry->vme_resilient_codesign = TRUE;
18734 }
18735 entry->vme_start += *address;
18736 entry->vme_end += *address;
18737 assert(!entry->map_aligned);
18738 if (vmk_flags.vmf_resilient_media &&
18739 !entry->is_sub_map &&
18740 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18741 VME_OBJECT(entry)->internal)) {
18742 entry->vme_resilient_media = TRUE;
18743 }
18744 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18745 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18746 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18747 vm_map_store_entry_link(target_map, insp_entry, entry,
18748 vmk_flags);
18749 insp_entry = entry;
18750 } else {
18751 if (!entry->is_sub_map) {
18752 vm_object_deallocate(VME_OBJECT(entry));
18753 } else {
18754 vm_map_deallocate(VME_SUBMAP(entry));
18755 }
18756 vm_map_copy_entry_dispose(entry);
18757 }
18758 }
18759
18760 if (vmk_flags.vmf_resilient_codesign) {
18761 *cur_protection = VM_PROT_READ;
18762 *max_protection = VM_PROT_READ;
18763 }
18764
18765 if (result == KERN_SUCCESS) {
18766 target_map->size += target_size;
18767 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18768 }
18769 vm_map_unlock(target_map);
18770
18771 vm_map_zap_dispose(&zap_list);
18772
18773 if (result == KERN_SUCCESS && target_map->wiring_required) {
18774 result = vm_map_wire_kernel(target_map, *address,
18775 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18776 TRUE);
18777 }
18778
18779 /*
18780 * If requested, return the address of the data pointed to by the
18781 * request, rather than the base of the resulting page.
18782 */
18783 if (vmk_flags.vmf_return_data_addr) {
18784 *address += offset_in_mapping;
18785 }
18786
18787 if (src_page_mask != target_page_mask) {
18788 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18789 }
18790 vm_map_copy_discard(copy_map);
18791 copy_map = VM_MAP_COPY_NULL;
18792
18793 return result;
18794 }
18795
18796 /*
18797 * Routine: vm_map_remap_range_allocate
18798 *
18799 * Description:
18800 * Allocate a range in the specified virtual address map.
18801 * returns the address and the map entry just before the allocated
18802 * range
18803 *
18804 * Map must be locked.
18805 */
18806
18807 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_entry_t * map_entry,vm_map_zap_t zap_list)18808 vm_map_remap_range_allocate(
18809 vm_map_t map,
18810 vm_map_address_t *address, /* IN/OUT */
18811 vm_map_size_t size,
18812 vm_map_offset_t mask,
18813 vm_map_kernel_flags_t vmk_flags,
18814 vm_map_entry_t *map_entry, /* OUT */
18815 vm_map_zap_t zap_list)
18816 {
18817 vm_map_entry_t entry;
18818 vm_map_offset_t start;
18819 kern_return_t kr;
18820
18821 start = *address;
18822
18823 if (!vmk_flags.vmf_fixed) {
18824 kr = vm_map_locate_space(map, size, mask, vmk_flags,
18825 &start, &entry);
18826 if (kr != KERN_SUCCESS) {
18827 return kr;
18828 }
18829 *address = start;
18830 } else {
18831 vm_map_offset_t effective_min_offset, effective_max_offset;
18832 vm_map_entry_t temp_entry;
18833 vm_map_offset_t end;
18834
18835 effective_min_offset = map->min_offset;
18836 effective_max_offset = map->max_offset;
18837
18838 /*
18839 * Verify that:
18840 * the address doesn't itself violate
18841 * the mask requirement.
18842 */
18843
18844 if ((start & mask) != 0) {
18845 return KERN_NO_SPACE;
18846 }
18847
18848 #if CONFIG_MAP_RANGES
18849 if (map->uses_user_ranges) {
18850 struct mach_vm_range r;
18851
18852 vm_map_user_range_resolve(map, start, 1, &r);
18853 if (r.max_address == 0) {
18854 return KERN_INVALID_ADDRESS;
18855 }
18856
18857 effective_min_offset = r.min_address;
18858 effective_max_offset = r.max_address;
18859 }
18860 #endif /* CONFIG_MAP_RANGES */
18861 if (map == kernel_map) {
18862 mach_vm_range_t r = kmem_validate_range_for_overwrite(start, size);
18863 effective_min_offset = r->min_address;
18864 effective_min_offset = r->max_address;
18865 }
18866
18867 /*
18868 * ... the address is within bounds
18869 */
18870
18871 end = start + size;
18872
18873 if ((start < effective_min_offset) ||
18874 (end > effective_max_offset) ||
18875 (start >= end)) {
18876 return KERN_INVALID_ADDRESS;
18877 }
18878
18879 /*
18880 * If we're asked to overwrite whatever was mapped in that
18881 * range, first deallocate that range.
18882 */
18883 if (vmk_flags.vmf_overwrite) {
18884 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
18885
18886 /*
18887 * We use a "zap_list" to avoid having to unlock
18888 * the "map" in vm_map_delete(), which would compromise
18889 * the atomicity of the "deallocate" and then "remap"
18890 * combination.
18891 */
18892 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
18893
18894 if (vmk_flags.vmkf_overwrite_immutable) {
18895 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18896 }
18897 if (vmk_flags.vmkf_remap_prot_copy) {
18898 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
18899 }
18900 kr = vm_map_delete(map, start, end, remove_flags,
18901 KMEM_GUARD_NONE, zap_list).kmr_return;
18902 if (kr != KERN_SUCCESS) {
18903 /* XXX FBDP restore zap_list? */
18904 return kr;
18905 }
18906 }
18907
18908 /*
18909 * ... the starting address isn't allocated
18910 */
18911
18912 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18913 return KERN_NO_SPACE;
18914 }
18915
18916 entry = temp_entry;
18917
18918 /*
18919 * ... the next region doesn't overlap the
18920 * end point.
18921 */
18922
18923 if ((entry->vme_next != vm_map_to_entry(map)) &&
18924 (entry->vme_next->vme_start < end)) {
18925 return KERN_NO_SPACE;
18926 }
18927 }
18928 *map_entry = entry;
18929 return KERN_SUCCESS;
18930 }
18931
18932 /*
18933 * vm_map_switch:
18934 *
18935 * Set the address map for the current thread to the specified map
18936 */
18937
18938 vm_map_t
vm_map_switch(vm_map_t map)18939 vm_map_switch(
18940 vm_map_t map)
18941 {
18942 thread_t thread = current_thread();
18943 vm_map_t oldmap = thread->map;
18944
18945
18946 /*
18947 * Deactivate the current map and activate the requested map
18948 */
18949 mp_disable_preemption();
18950 PMAP_SWITCH_USER(thread, map, cpu_number());
18951 mp_enable_preemption();
18952 return oldmap;
18953 }
18954
18955
18956 /*
18957 * Routine: vm_map_write_user
18958 *
18959 * Description:
18960 * Copy out data from a kernel space into space in the
18961 * destination map. The space must already exist in the
18962 * destination map.
18963 * NOTE: This routine should only be called by threads
18964 * which can block on a page fault. i.e. kernel mode user
18965 * threads.
18966 *
18967 */
18968 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18969 vm_map_write_user(
18970 vm_map_t map,
18971 void *src_p,
18972 vm_map_address_t dst_addr,
18973 vm_size_t size)
18974 {
18975 kern_return_t kr = KERN_SUCCESS;
18976
18977 if (__improbable(vm_map_range_overflows(map, dst_addr, size))) {
18978 return KERN_INVALID_ADDRESS;
18979 }
18980
18981 if (current_map() == map) {
18982 if (copyout(src_p, dst_addr, size)) {
18983 kr = KERN_INVALID_ADDRESS;
18984 }
18985 } else {
18986 vm_map_t oldmap;
18987
18988 /* take on the identity of the target map while doing */
18989 /* the transfer */
18990
18991 vm_map_reference(map);
18992 oldmap = vm_map_switch(map);
18993 if (copyout(src_p, dst_addr, size)) {
18994 kr = KERN_INVALID_ADDRESS;
18995 }
18996 vm_map_switch(oldmap);
18997 vm_map_deallocate(map);
18998 }
18999 return kr;
19000 }
19001
19002 /*
19003 * Routine: vm_map_read_user
19004 *
19005 * Description:
19006 * Copy in data from a user space source map into the
19007 * kernel map. The space must already exist in the
19008 * kernel map.
19009 * NOTE: This routine should only be called by threads
19010 * which can block on a page fault. i.e. kernel mode user
19011 * threads.
19012 *
19013 */
19014 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)19015 vm_map_read_user(
19016 vm_map_t map,
19017 vm_map_address_t src_addr,
19018 void *dst_p,
19019 vm_size_t size)
19020 {
19021 kern_return_t kr = KERN_SUCCESS;
19022
19023 if (__improbable(vm_map_range_overflows(map, src_addr, size))) {
19024 return KERN_INVALID_ADDRESS;
19025 }
19026
19027 if (current_map() == map) {
19028 if (copyin(src_addr, dst_p, size)) {
19029 kr = KERN_INVALID_ADDRESS;
19030 }
19031 } else {
19032 vm_map_t oldmap;
19033
19034 /* take on the identity of the target map while doing */
19035 /* the transfer */
19036
19037 vm_map_reference(map);
19038 oldmap = vm_map_switch(map);
19039 if (copyin(src_addr, dst_p, size)) {
19040 kr = KERN_INVALID_ADDRESS;
19041 }
19042 vm_map_switch(oldmap);
19043 vm_map_deallocate(map);
19044 }
19045 return kr;
19046 }
19047
19048
19049 /*
19050 * vm_map_check_protection:
19051 *
19052 * Assert that the target map allows the specified
19053 * privilege on the entire address region given.
19054 * The entire region must be allocated.
19055 */
19056 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)19057 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
19058 vm_map_offset_t end, vm_prot_t protection)
19059 {
19060 vm_map_entry_t entry;
19061 vm_map_entry_t tmp_entry;
19062
19063 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
19064 return FALSE;
19065 }
19066
19067 vm_map_lock(map);
19068
19069 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
19070 vm_map_unlock(map);
19071 return FALSE;
19072 }
19073
19074 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
19075 vm_map_unlock(map);
19076 return FALSE;
19077 }
19078
19079 entry = tmp_entry;
19080
19081 while (start < end) {
19082 if (entry == vm_map_to_entry(map)) {
19083 vm_map_unlock(map);
19084 return FALSE;
19085 }
19086
19087 /*
19088 * No holes allowed!
19089 */
19090
19091 if (start < entry->vme_start) {
19092 vm_map_unlock(map);
19093 return FALSE;
19094 }
19095
19096 /*
19097 * Check protection associated with entry.
19098 */
19099
19100 if ((entry->protection & protection) != protection) {
19101 vm_map_unlock(map);
19102 return FALSE;
19103 }
19104
19105 /* go to next entry */
19106
19107 start = entry->vme_end;
19108 entry = entry->vme_next;
19109 }
19110 vm_map_unlock(map);
19111 return TRUE;
19112 }
19113
19114 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)19115 vm_map_purgable_control(
19116 vm_map_t map,
19117 vm_map_offset_t address,
19118 vm_purgable_t control,
19119 int *state)
19120 {
19121 vm_map_entry_t entry;
19122 vm_object_t object;
19123 kern_return_t kr;
19124 boolean_t was_nonvolatile;
19125
19126 /*
19127 * Vet all the input parameters and current type and state of the
19128 * underlaying object. Return with an error if anything is amiss.
19129 */
19130 if (map == VM_MAP_NULL) {
19131 return KERN_INVALID_ARGUMENT;
19132 }
19133
19134 if (control != VM_PURGABLE_SET_STATE &&
19135 control != VM_PURGABLE_GET_STATE &&
19136 control != VM_PURGABLE_PURGE_ALL &&
19137 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
19138 return KERN_INVALID_ARGUMENT;
19139 }
19140
19141 if (control == VM_PURGABLE_PURGE_ALL) {
19142 vm_purgeable_object_purge_all();
19143 return KERN_SUCCESS;
19144 }
19145
19146 if ((control == VM_PURGABLE_SET_STATE ||
19147 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
19148 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
19149 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
19150 return KERN_INVALID_ARGUMENT;
19151 }
19152
19153 vm_map_lock_read(map);
19154
19155 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
19156 /*
19157 * Must pass a valid non-submap address.
19158 */
19159 vm_map_unlock_read(map);
19160 return KERN_INVALID_ADDRESS;
19161 }
19162
19163 if ((entry->protection & VM_PROT_WRITE) == 0 &&
19164 control != VM_PURGABLE_GET_STATE) {
19165 /*
19166 * Can't apply purgable controls to something you can't write.
19167 */
19168 vm_map_unlock_read(map);
19169 return KERN_PROTECTION_FAILURE;
19170 }
19171
19172 object = VME_OBJECT(entry);
19173 if (object == VM_OBJECT_NULL ||
19174 object->purgable == VM_PURGABLE_DENY) {
19175 /*
19176 * Object must already be present and be purgeable.
19177 */
19178 vm_map_unlock_read(map);
19179 return KERN_INVALID_ARGUMENT;
19180 }
19181
19182 vm_object_lock(object);
19183
19184 #if 00
19185 if (VME_OFFSET(entry) != 0 ||
19186 entry->vme_end - entry->vme_start != object->vo_size) {
19187 /*
19188 * Can only apply purgable controls to the whole (existing)
19189 * object at once.
19190 */
19191 vm_map_unlock_read(map);
19192 vm_object_unlock(object);
19193 return KERN_INVALID_ARGUMENT;
19194 }
19195 #endif
19196
19197 assert(!entry->is_sub_map);
19198 assert(!entry->use_pmap); /* purgeable has its own accounting */
19199
19200 vm_map_unlock_read(map);
19201
19202 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
19203
19204 kr = vm_object_purgable_control(object, control, state);
19205
19206 if (was_nonvolatile &&
19207 object->purgable != VM_PURGABLE_NONVOLATILE &&
19208 map->pmap == kernel_pmap) {
19209 #if DEBUG
19210 object->vo_purgeable_volatilizer = kernel_task;
19211 #endif /* DEBUG */
19212 }
19213
19214 vm_object_unlock(object);
19215
19216 return kr;
19217 }
19218
19219 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)19220 vm_map_footprint_query_page_info(
19221 vm_map_t map,
19222 vm_map_entry_t map_entry,
19223 vm_map_offset_t curr_s_offset,
19224 int *disposition_p)
19225 {
19226 int pmap_disp;
19227 vm_object_t object = VM_OBJECT_NULL;
19228 int disposition;
19229 int effective_page_size;
19230
19231 vm_map_lock_assert_held(map);
19232 assert(!map->has_corpse_footprint);
19233 assert(curr_s_offset >= map_entry->vme_start);
19234 assert(curr_s_offset < map_entry->vme_end);
19235
19236 if (map_entry->is_sub_map) {
19237 if (!map_entry->use_pmap) {
19238 /* nested pmap: no footprint */
19239 *disposition_p = 0;
19240 return;
19241 }
19242 } else {
19243 object = VME_OBJECT(map_entry);
19244 if (object == VM_OBJECT_NULL) {
19245 /* nothing mapped here: no need to ask */
19246 *disposition_p = 0;
19247 return;
19248 }
19249 }
19250
19251 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
19252
19253 pmap_disp = 0;
19254
19255 /*
19256 * Query the pmap.
19257 */
19258 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
19259
19260 /*
19261 * Compute this page's disposition.
19262 */
19263 disposition = 0;
19264
19265 /* deal with "alternate accounting" first */
19266 if (!map_entry->is_sub_map &&
19267 object->vo_no_footprint) {
19268 /* does not count in footprint */
19269 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19270 } else if (!map_entry->is_sub_map &&
19271 (object->purgable == VM_PURGABLE_NONVOLATILE ||
19272 (object->purgable == VM_PURGABLE_DENY &&
19273 object->vo_ledger_tag)) &&
19274 VM_OBJECT_OWNER(object) != NULL &&
19275 VM_OBJECT_OWNER(object)->map == map) {
19276 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19277 if ((((curr_s_offset
19278 - map_entry->vme_start
19279 + VME_OFFSET(map_entry))
19280 / effective_page_size) <
19281 (object->resident_page_count +
19282 vm_compressor_pager_get_count(object->pager)))) {
19283 /*
19284 * Non-volatile purgeable object owned
19285 * by this task: report the first
19286 * "#resident + #compressed" pages as
19287 * "resident" (to show that they
19288 * contribute to the footprint) but not
19289 * "dirty" (to avoid double-counting
19290 * with the fake "non-volatile" region
19291 * we'll report at the end of the
19292 * address space to account for all
19293 * (mapped or not) non-volatile memory
19294 * owned by this task.
19295 */
19296 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19297 }
19298 } else if (!map_entry->is_sub_map &&
19299 (object->purgable == VM_PURGABLE_VOLATILE ||
19300 object->purgable == VM_PURGABLE_EMPTY) &&
19301 VM_OBJECT_OWNER(object) != NULL &&
19302 VM_OBJECT_OWNER(object)->map == map) {
19303 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19304 if ((((curr_s_offset
19305 - map_entry->vme_start
19306 + VME_OFFSET(map_entry))
19307 / effective_page_size) <
19308 object->wired_page_count)) {
19309 /*
19310 * Volatile|empty purgeable object owned
19311 * by this task: report the first
19312 * "#wired" pages as "resident" (to
19313 * show that they contribute to the
19314 * footprint) but not "dirty" (to avoid
19315 * double-counting with the fake
19316 * "non-volatile" region we'll report
19317 * at the end of the address space to
19318 * account for all (mapped or not)
19319 * non-volatile memory owned by this
19320 * task.
19321 */
19322 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19323 }
19324 } else if (!map_entry->is_sub_map &&
19325 map_entry->iokit_acct &&
19326 object->internal &&
19327 object->purgable == VM_PURGABLE_DENY) {
19328 /*
19329 * Non-purgeable IOKit memory: phys_footprint
19330 * includes the entire virtual mapping.
19331 */
19332 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19333 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19334 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19335 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
19336 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
19337 /* alternate accounting */
19338 #if __arm64__ && (DEVELOPMENT || DEBUG)
19339 if (map->pmap->footprint_was_suspended) {
19340 /*
19341 * The assertion below can fail if dyld
19342 * suspended footprint accounting
19343 * while doing some adjustments to
19344 * this page; the mapping would say
19345 * "use pmap accounting" but the page
19346 * would be marked "alternate
19347 * accounting".
19348 */
19349 } else
19350 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
19351 {
19352 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19353 }
19354 disposition = 0;
19355 } else {
19356 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
19357 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19358 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19359 disposition |= VM_PAGE_QUERY_PAGE_REF;
19360 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
19361 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19362 } else {
19363 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19364 }
19365 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
19366 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19367 }
19368 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
19369 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19370 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19371 }
19372 }
19373
19374 *disposition_p = disposition;
19375 }
19376
19377 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)19378 vm_map_page_query_internal(
19379 vm_map_t target_map,
19380 vm_map_offset_t offset,
19381 int *disposition,
19382 int *ref_count)
19383 {
19384 kern_return_t kr;
19385 vm_page_info_basic_data_t info;
19386 mach_msg_type_number_t count;
19387
19388 count = VM_PAGE_INFO_BASIC_COUNT;
19389 kr = vm_map_page_info(target_map,
19390 offset,
19391 VM_PAGE_INFO_BASIC,
19392 (vm_page_info_t) &info,
19393 &count);
19394 if (kr == KERN_SUCCESS) {
19395 *disposition = info.disposition;
19396 *ref_count = info.ref_count;
19397 } else {
19398 *disposition = 0;
19399 *ref_count = 0;
19400 }
19401
19402 return kr;
19403 }
19404
19405 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19406 vm_map_page_info(
19407 vm_map_t map,
19408 vm_map_offset_t offset,
19409 vm_page_info_flavor_t flavor,
19410 vm_page_info_t info,
19411 mach_msg_type_number_t *count)
19412 {
19413 return vm_map_page_range_info_internal(map,
19414 offset, /* start of range */
19415 (offset + 1), /* this will get rounded in the call to the page boundary */
19416 (int)-1, /* effective_page_shift: unspecified */
19417 flavor,
19418 info,
19419 count);
19420 }
19421
19422 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19423 vm_map_page_range_info_internal(
19424 vm_map_t map,
19425 vm_map_offset_t start_offset,
19426 vm_map_offset_t end_offset,
19427 int effective_page_shift,
19428 vm_page_info_flavor_t flavor,
19429 vm_page_info_t info,
19430 mach_msg_type_number_t *count)
19431 {
19432 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
19433 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19434 vm_page_t m = VM_PAGE_NULL;
19435 kern_return_t retval = KERN_SUCCESS;
19436 int disposition = 0;
19437 int ref_count = 0;
19438 int depth = 0, info_idx = 0;
19439 vm_page_info_basic_t basic_info = 0;
19440 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19441 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19442 boolean_t do_region_footprint;
19443 ledger_amount_t ledger_resident, ledger_compressed;
19444 int effective_page_size;
19445 vm_map_offset_t effective_page_mask;
19446
19447 switch (flavor) {
19448 case VM_PAGE_INFO_BASIC:
19449 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19450 /*
19451 * The "vm_page_info_basic_data" structure was not
19452 * properly padded, so allow the size to be off by
19453 * one to maintain backwards binary compatibility...
19454 */
19455 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19456 return KERN_INVALID_ARGUMENT;
19457 }
19458 }
19459 break;
19460 default:
19461 return KERN_INVALID_ARGUMENT;
19462 }
19463
19464 if (effective_page_shift == -1) {
19465 effective_page_shift = vm_self_region_page_shift_safely(map);
19466 if (effective_page_shift == -1) {
19467 return KERN_INVALID_ARGUMENT;
19468 }
19469 }
19470 effective_page_size = (1 << effective_page_shift);
19471 effective_page_mask = effective_page_size - 1;
19472
19473 do_region_footprint = task_self_region_footprint();
19474 disposition = 0;
19475 ref_count = 0;
19476 depth = 0;
19477 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19478 retval = KERN_SUCCESS;
19479
19480 if (__improbable(vm_map_range_overflows(map, start_offset, end_offset - start_offset))) {
19481 return KERN_INVALID_ADDRESS;
19482 }
19483
19484 offset_in_page = start_offset & effective_page_mask;
19485 start = vm_map_trunc_page(start_offset, effective_page_mask);
19486 end = vm_map_round_page(end_offset, effective_page_mask);
19487
19488 if (end < start) {
19489 return KERN_INVALID_ARGUMENT;
19490 }
19491
19492 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19493
19494 vm_map_lock_read(map);
19495
19496 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19497
19498 for (curr_s_offset = start; curr_s_offset < end;) {
19499 /*
19500 * New lookup needs reset of these variables.
19501 */
19502 curr_object = object = VM_OBJECT_NULL;
19503 offset_in_object = 0;
19504 ref_count = 0;
19505 depth = 0;
19506
19507 if (do_region_footprint &&
19508 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19509 /*
19510 * Request for "footprint" info about a page beyond
19511 * the end of address space: this must be for
19512 * the fake region vm_map_region_recurse_64()
19513 * reported to account for non-volatile purgeable
19514 * memory owned by this task.
19515 */
19516 disposition = 0;
19517
19518 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19519 (unsigned) ledger_compressed) {
19520 /*
19521 * We haven't reported all the "non-volatile
19522 * compressed" pages yet, so report this fake
19523 * page as "compressed".
19524 */
19525 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19526 } else {
19527 /*
19528 * We've reported all the non-volatile
19529 * compressed page but not all the non-volatile
19530 * pages , so report this fake page as
19531 * "resident dirty".
19532 */
19533 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19534 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19535 disposition |= VM_PAGE_QUERY_PAGE_REF;
19536 }
19537 switch (flavor) {
19538 case VM_PAGE_INFO_BASIC:
19539 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19540 basic_info->disposition = disposition;
19541 basic_info->ref_count = 1;
19542 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19543 basic_info->offset = 0;
19544 basic_info->depth = 0;
19545
19546 info_idx++;
19547 break;
19548 }
19549 curr_s_offset += effective_page_size;
19550 continue;
19551 }
19552
19553 /*
19554 * First, find the map entry covering "curr_s_offset", going down
19555 * submaps if necessary.
19556 */
19557 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19558 /* no entry -> no object -> no page */
19559
19560 if (curr_s_offset < vm_map_min(map)) {
19561 /*
19562 * Illegal address that falls below map min.
19563 */
19564 curr_e_offset = MIN(end, vm_map_min(map));
19565 } else if (curr_s_offset >= vm_map_max(map)) {
19566 /*
19567 * Illegal address that falls on/after map max.
19568 */
19569 curr_e_offset = end;
19570 } else if (map_entry == vm_map_to_entry(map)) {
19571 /*
19572 * Hit a hole.
19573 */
19574 if (map_entry->vme_next == vm_map_to_entry(map)) {
19575 /*
19576 * Empty map.
19577 */
19578 curr_e_offset = MIN(map->max_offset, end);
19579 } else {
19580 /*
19581 * Hole at start of the map.
19582 */
19583 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19584 }
19585 } else {
19586 if (map_entry->vme_next == vm_map_to_entry(map)) {
19587 /*
19588 * Hole at the end of the map.
19589 */
19590 curr_e_offset = MIN(map->max_offset, end);
19591 } else {
19592 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19593 }
19594 }
19595
19596 assert(curr_e_offset >= curr_s_offset);
19597
19598 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19599
19600 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19601
19602 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19603
19604 curr_s_offset = curr_e_offset;
19605
19606 info_idx += num_pages;
19607
19608 continue;
19609 }
19610
19611 /* compute offset from this map entry's start */
19612 offset_in_object = curr_s_offset - map_entry->vme_start;
19613
19614 /* compute offset into this map entry's object (or submap) */
19615 offset_in_object += VME_OFFSET(map_entry);
19616
19617 if (map_entry->is_sub_map) {
19618 vm_map_t sub_map = VM_MAP_NULL;
19619 vm_page_info_t submap_info = 0;
19620 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19621
19622 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19623
19624 submap_s_offset = offset_in_object;
19625 submap_e_offset = submap_s_offset + range_len;
19626
19627 sub_map = VME_SUBMAP(map_entry);
19628
19629 vm_map_reference(sub_map);
19630 vm_map_unlock_read(map);
19631
19632 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19633
19634 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19635 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19636
19637 retval = vm_map_page_range_info_internal(sub_map,
19638 submap_s_offset,
19639 submap_e_offset,
19640 effective_page_shift,
19641 VM_PAGE_INFO_BASIC,
19642 (vm_page_info_t) submap_info,
19643 count);
19644
19645 assert(retval == KERN_SUCCESS);
19646
19647 vm_map_lock_read(map);
19648 vm_map_deallocate(sub_map);
19649
19650 /* Move the "info" index by the number of pages we inspected.*/
19651 info_idx += range_len >> effective_page_shift;
19652
19653 /* Move our current offset by the size of the range we inspected.*/
19654 curr_s_offset += range_len;
19655
19656 continue;
19657 }
19658
19659 object = VME_OBJECT(map_entry);
19660
19661 if (object == VM_OBJECT_NULL) {
19662 /*
19663 * We don't have an object here and, hence,
19664 * no pages to inspect. We'll fill up the
19665 * info structure appropriately.
19666 */
19667
19668 curr_e_offset = MIN(map_entry->vme_end, end);
19669
19670 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19671
19672 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19673
19674 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19675
19676 curr_s_offset = curr_e_offset;
19677
19678 info_idx += num_pages;
19679
19680 continue;
19681 }
19682
19683 if (do_region_footprint) {
19684 disposition = 0;
19685 if (map->has_corpse_footprint) {
19686 /*
19687 * Query the page info data we saved
19688 * while forking the corpse.
19689 */
19690 vm_map_corpse_footprint_query_page_info(
19691 map,
19692 curr_s_offset,
19693 &disposition);
19694 } else {
19695 /*
19696 * Query the live pmap for footprint info
19697 * about this page.
19698 */
19699 vm_map_footprint_query_page_info(
19700 map,
19701 map_entry,
19702 curr_s_offset,
19703 &disposition);
19704 }
19705 switch (flavor) {
19706 case VM_PAGE_INFO_BASIC:
19707 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19708 basic_info->disposition = disposition;
19709 basic_info->ref_count = 1;
19710 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19711 basic_info->offset = 0;
19712 basic_info->depth = 0;
19713
19714 info_idx++;
19715 break;
19716 }
19717 curr_s_offset += effective_page_size;
19718 continue;
19719 }
19720
19721 vm_object_reference(object);
19722 /*
19723 * Shared mode -- so we can allow other readers
19724 * to grab the lock too.
19725 */
19726 vm_object_lock_shared(object);
19727
19728 curr_e_offset = MIN(map_entry->vme_end, end);
19729
19730 vm_map_unlock_read(map);
19731
19732 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19733
19734 curr_object = object;
19735
19736 for (; curr_s_offset < curr_e_offset;) {
19737 if (object == curr_object) {
19738 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19739 } else {
19740 ref_count = curr_object->ref_count;
19741 }
19742
19743 curr_offset_in_object = offset_in_object;
19744
19745 for (;;) {
19746 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19747
19748 if (m != VM_PAGE_NULL) {
19749 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19750 break;
19751 } else {
19752 if (curr_object->internal &&
19753 curr_object->alive &&
19754 !curr_object->terminating &&
19755 curr_object->pager_ready) {
19756 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19757 == VM_EXTERNAL_STATE_EXISTS) {
19758 /* the pager has that page */
19759 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19760 break;
19761 }
19762 }
19763
19764 /*
19765 * Go down the VM object shadow chain until we find the page
19766 * we're looking for.
19767 */
19768
19769 if (curr_object->shadow != VM_OBJECT_NULL) {
19770 vm_object_t shadow = VM_OBJECT_NULL;
19771
19772 curr_offset_in_object += curr_object->vo_shadow_offset;
19773 shadow = curr_object->shadow;
19774
19775 vm_object_lock_shared(shadow);
19776 vm_object_unlock(curr_object);
19777
19778 curr_object = shadow;
19779 depth++;
19780 continue;
19781 } else {
19782 break;
19783 }
19784 }
19785 }
19786
19787 /* The ref_count is not strictly accurate, it measures the number */
19788 /* of entities holding a ref on the object, they may not be mapping */
19789 /* the object or may not be mapping the section holding the */
19790 /* target page but its still a ball park number and though an over- */
19791 /* count, it picks up the copy-on-write cases */
19792
19793 /* We could also get a picture of page sharing from pmap_attributes */
19794 /* but this would under count as only faulted-in mappings would */
19795 /* show up. */
19796
19797 if ((curr_object == object) && curr_object->shadow) {
19798 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19799 }
19800
19801 if (!curr_object->internal) {
19802 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19803 }
19804
19805 if (m != VM_PAGE_NULL) {
19806 if (m->vmp_fictitious) {
19807 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19808 } else {
19809 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19810 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19811 }
19812
19813 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19814 disposition |= VM_PAGE_QUERY_PAGE_REF;
19815 }
19816
19817 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19818 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19819 }
19820
19821 /*
19822 * XXX TODO4K:
19823 * when this routine deals with 4k
19824 * pages, check the appropriate CS bit
19825 * here.
19826 */
19827 if (m->vmp_cs_validated) {
19828 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19829 }
19830 if (m->vmp_cs_tainted) {
19831 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19832 }
19833 if (m->vmp_cs_nx) {
19834 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19835 }
19836 if (m->vmp_reusable || curr_object->all_reusable) {
19837 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19838 }
19839 }
19840 }
19841
19842 switch (flavor) {
19843 case VM_PAGE_INFO_BASIC:
19844 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19845 basic_info->disposition = disposition;
19846 basic_info->ref_count = ref_count;
19847 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19848 VM_KERNEL_ADDRPERM(curr_object);
19849 basic_info->offset =
19850 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19851 basic_info->depth = depth;
19852
19853 info_idx++;
19854 break;
19855 }
19856
19857 disposition = 0;
19858 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19859
19860 /*
19861 * Move to next offset in the range and in our object.
19862 */
19863 curr_s_offset += effective_page_size;
19864 offset_in_object += effective_page_size;
19865 curr_offset_in_object = offset_in_object;
19866
19867 if (curr_object != object) {
19868 vm_object_unlock(curr_object);
19869
19870 curr_object = object;
19871
19872 vm_object_lock_shared(curr_object);
19873 } else {
19874 vm_object_lock_yield_shared(curr_object);
19875 }
19876 }
19877
19878 vm_object_unlock(curr_object);
19879 vm_object_deallocate(curr_object);
19880
19881 vm_map_lock_read(map);
19882 }
19883
19884 vm_map_unlock_read(map);
19885 return retval;
19886 }
19887
19888 /*
19889 * vm_map_msync
19890 *
19891 * Synchronises the memory range specified with its backing store
19892 * image by either flushing or cleaning the contents to the appropriate
19893 * memory manager engaging in a memory object synchronize dialog with
19894 * the manager. The client doesn't return until the manager issues
19895 * m_o_s_completed message. MIG Magically converts user task parameter
19896 * to the task's address map.
19897 *
19898 * interpretation of sync_flags
19899 * VM_SYNC_INVALIDATE - discard pages, only return precious
19900 * pages to manager.
19901 *
19902 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19903 * - discard pages, write dirty or precious
19904 * pages back to memory manager.
19905 *
19906 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19907 * - write dirty or precious pages back to
19908 * the memory manager.
19909 *
19910 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19911 * is a hole in the region, and we would
19912 * have returned KERN_SUCCESS, return
19913 * KERN_INVALID_ADDRESS instead.
19914 *
19915 * NOTE
19916 * The memory object attributes have not yet been implemented, this
19917 * function will have to deal with the invalidate attribute
19918 *
19919 * RETURNS
19920 * KERN_INVALID_TASK Bad task parameter
19921 * KERN_INVALID_ARGUMENT both sync and async were specified.
19922 * KERN_SUCCESS The usual.
19923 * KERN_INVALID_ADDRESS There was a hole in the region.
19924 */
19925
19926 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19927 vm_map_msync(
19928 vm_map_t map,
19929 vm_map_address_t address,
19930 vm_map_size_t size,
19931 vm_sync_t sync_flags)
19932 {
19933 vm_map_entry_t entry;
19934 vm_map_size_t amount_left;
19935 vm_object_offset_t offset;
19936 vm_object_offset_t start_offset, end_offset;
19937 boolean_t do_sync_req;
19938 boolean_t had_hole = FALSE;
19939 vm_map_offset_t pmap_offset;
19940
19941 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19942 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19943 return KERN_INVALID_ARGUMENT;
19944 }
19945
19946 if (__improbable(vm_map_range_overflows(map, address, size))) {
19947 return KERN_INVALID_ADDRESS;
19948 }
19949
19950 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19951 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19952 }
19953
19954 /*
19955 * align address and size on page boundaries
19956 */
19957 size = (vm_map_round_page(address + size,
19958 VM_MAP_PAGE_MASK(map)) -
19959 vm_map_trunc_page(address,
19960 VM_MAP_PAGE_MASK(map)));
19961 address = vm_map_trunc_page(address,
19962 VM_MAP_PAGE_MASK(map));
19963
19964 if (map == VM_MAP_NULL) {
19965 return KERN_INVALID_TASK;
19966 }
19967
19968 if (size == 0) {
19969 return KERN_SUCCESS;
19970 }
19971
19972 amount_left = size;
19973
19974 while (amount_left > 0) {
19975 vm_object_size_t flush_size;
19976 vm_object_t object;
19977
19978 vm_map_lock(map);
19979 if (!vm_map_lookup_entry(map,
19980 address,
19981 &entry)) {
19982 vm_map_size_t skip;
19983
19984 /*
19985 * hole in the address map.
19986 */
19987 had_hole = TRUE;
19988
19989 if (sync_flags & VM_SYNC_KILLPAGES) {
19990 /*
19991 * For VM_SYNC_KILLPAGES, there should be
19992 * no holes in the range, since we couldn't
19993 * prevent someone else from allocating in
19994 * that hole and we wouldn't want to "kill"
19995 * their pages.
19996 */
19997 vm_map_unlock(map);
19998 break;
19999 }
20000
20001 /*
20002 * Check for empty map.
20003 */
20004 if (entry == vm_map_to_entry(map) &&
20005 entry->vme_next == entry) {
20006 vm_map_unlock(map);
20007 break;
20008 }
20009 /*
20010 * Check that we don't wrap and that
20011 * we have at least one real map entry.
20012 */
20013 if ((map->hdr.nentries == 0) ||
20014 (entry->vme_next->vme_start < address)) {
20015 vm_map_unlock(map);
20016 break;
20017 }
20018 /*
20019 * Move up to the next entry if needed
20020 */
20021 skip = (entry->vme_next->vme_start - address);
20022 if (skip >= amount_left) {
20023 amount_left = 0;
20024 } else {
20025 amount_left -= skip;
20026 }
20027 address = entry->vme_next->vme_start;
20028 vm_map_unlock(map);
20029 continue;
20030 }
20031
20032 offset = address - entry->vme_start;
20033 pmap_offset = address;
20034
20035 /*
20036 * do we have more to flush than is contained in this
20037 * entry ?
20038 */
20039 if (amount_left + entry->vme_start + offset > entry->vme_end) {
20040 flush_size = entry->vme_end -
20041 (entry->vme_start + offset);
20042 } else {
20043 flush_size = amount_left;
20044 }
20045 amount_left -= flush_size;
20046 address += flush_size;
20047
20048 if (entry->is_sub_map == TRUE) {
20049 vm_map_t local_map;
20050 vm_map_offset_t local_offset;
20051
20052 local_map = VME_SUBMAP(entry);
20053 local_offset = VME_OFFSET(entry);
20054 vm_map_reference(local_map);
20055 vm_map_unlock(map);
20056 if (vm_map_msync(
20057 local_map,
20058 local_offset,
20059 flush_size,
20060 sync_flags) == KERN_INVALID_ADDRESS) {
20061 had_hole = TRUE;
20062 }
20063 vm_map_deallocate(local_map);
20064 continue;
20065 }
20066 object = VME_OBJECT(entry);
20067
20068 /*
20069 * We can't sync this object if the object has not been
20070 * created yet
20071 */
20072 if (object == VM_OBJECT_NULL) {
20073 vm_map_unlock(map);
20074 continue;
20075 }
20076 offset += VME_OFFSET(entry);
20077
20078 vm_object_lock(object);
20079
20080 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
20081 int kill_pages = 0;
20082
20083 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
20084 /*
20085 * This is a destructive operation and so we
20086 * err on the side of limiting the range of
20087 * the operation.
20088 */
20089 start_offset = vm_object_round_page(offset);
20090 end_offset = vm_object_trunc_page(offset + flush_size);
20091
20092 if (end_offset <= start_offset) {
20093 vm_object_unlock(object);
20094 vm_map_unlock(map);
20095 continue;
20096 }
20097
20098 pmap_offset += start_offset - offset;
20099 } else {
20100 start_offset = offset;
20101 end_offset = offset + flush_size;
20102 }
20103
20104 if (sync_flags & VM_SYNC_KILLPAGES) {
20105 if (((object->ref_count == 1) ||
20106 ((object->copy_strategy !=
20107 MEMORY_OBJECT_COPY_SYMMETRIC) &&
20108 (object->copy == VM_OBJECT_NULL))) &&
20109 (object->shadow == VM_OBJECT_NULL)) {
20110 if (object->ref_count != 1) {
20111 vm_page_stats_reusable.free_shared++;
20112 }
20113 kill_pages = 1;
20114 } else {
20115 kill_pages = -1;
20116 }
20117 }
20118 if (kill_pages != -1) {
20119 vm_object_deactivate_pages(
20120 object,
20121 start_offset,
20122 (vm_object_size_t) (end_offset - start_offset),
20123 kill_pages,
20124 FALSE, /* reusable_pages */
20125 FALSE, /* reusable_no_write */
20126 map->pmap,
20127 pmap_offset);
20128 }
20129 vm_object_unlock(object);
20130 vm_map_unlock(map);
20131 continue;
20132 }
20133 /*
20134 * We can't sync this object if there isn't a pager.
20135 * Don't bother to sync internal objects, since there can't
20136 * be any "permanent" storage for these objects anyway.
20137 */
20138 if ((object->pager == MEMORY_OBJECT_NULL) ||
20139 (object->internal) || (object->private)) {
20140 vm_object_unlock(object);
20141 vm_map_unlock(map);
20142 continue;
20143 }
20144 /*
20145 * keep reference on the object until syncing is done
20146 */
20147 vm_object_reference_locked(object);
20148 vm_object_unlock(object);
20149
20150 vm_map_unlock(map);
20151
20152 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
20153 start_offset = vm_object_trunc_page(offset);
20154 end_offset = vm_object_round_page(offset + flush_size);
20155 } else {
20156 start_offset = offset;
20157 end_offset = offset + flush_size;
20158 }
20159
20160 do_sync_req = vm_object_sync(object,
20161 start_offset,
20162 (end_offset - start_offset),
20163 sync_flags & VM_SYNC_INVALIDATE,
20164 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
20165 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
20166 sync_flags & VM_SYNC_SYNCHRONOUS);
20167
20168 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
20169 /*
20170 * clear out the clustering and read-ahead hints
20171 */
20172 vm_object_lock(object);
20173
20174 object->pages_created = 0;
20175 object->pages_used = 0;
20176 object->sequential = 0;
20177 object->last_alloc = 0;
20178
20179 vm_object_unlock(object);
20180 }
20181 vm_object_deallocate(object);
20182 } /* while */
20183
20184 /* for proper msync() behaviour */
20185 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
20186 return KERN_INVALID_ADDRESS;
20187 }
20188
20189 return KERN_SUCCESS;
20190 }/* vm_msync */
20191
20192 void
vm_named_entry_associate_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)20193 vm_named_entry_associate_vm_object(
20194 vm_named_entry_t named_entry,
20195 vm_object_t object,
20196 vm_object_offset_t offset,
20197 vm_object_size_t size,
20198 vm_prot_t prot)
20199 {
20200 vm_map_copy_t copy;
20201 vm_map_entry_t copy_entry;
20202
20203 assert(!named_entry->is_sub_map);
20204 assert(!named_entry->is_copy);
20205 assert(!named_entry->is_object);
20206 assert(!named_entry->internal);
20207 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
20208
20209 copy = vm_map_copy_allocate(VM_MAP_COPY_ENTRY_LIST);
20210 copy->offset = offset;
20211 copy->size = size;
20212 copy->cpy_hdr.page_shift = (uint16_t)PAGE_SHIFT;
20213
20214 copy_entry = vm_map_copy_entry_create(copy);
20215 copy_entry->protection = prot;
20216 copy_entry->max_protection = prot;
20217 copy_entry->use_pmap = TRUE;
20218 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
20219 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
20220 VME_OBJECT_SET(copy_entry, object, false, 0);
20221 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
20222 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
20223
20224 named_entry->backing.copy = copy;
20225 named_entry->is_object = TRUE;
20226 if (object->internal) {
20227 named_entry->internal = TRUE;
20228 }
20229
20230 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n",
20231 named_entry, copy, object, offset, size, prot);
20232 }
20233
20234 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)20235 vm_named_entry_to_vm_object(
20236 vm_named_entry_t named_entry)
20237 {
20238 vm_map_copy_t copy;
20239 vm_map_entry_t copy_entry;
20240 vm_object_t object;
20241
20242 assert(!named_entry->is_sub_map);
20243 assert(!named_entry->is_copy);
20244 assert(named_entry->is_object);
20245 copy = named_entry->backing.copy;
20246 assert(copy != VM_MAP_COPY_NULL);
20247 /*
20248 * Assert that the vm_map_copy is coming from the right
20249 * zone and hasn't been forged
20250 */
20251 vm_map_copy_require(copy);
20252 assert(copy->cpy_hdr.nentries == 1);
20253 copy_entry = vm_map_copy_first_entry(copy);
20254 object = VME_OBJECT(copy_entry);
20255
20256 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
20257
20258 return object;
20259 }
20260
20261 /*
20262 * Routine: convert_port_entry_to_map
20263 * Purpose:
20264 * Convert from a port specifying an entry or a task
20265 * to a map. Doesn't consume the port ref; produces a map ref,
20266 * which may be null. Unlike convert_port_to_map, the
20267 * port may be task or a named entry backed.
20268 * Conditions:
20269 * Nothing locked.
20270 */
20271
20272 vm_map_t
convert_port_entry_to_map(ipc_port_t port)20273 convert_port_entry_to_map(
20274 ipc_port_t port)
20275 {
20276 vm_map_t map = VM_MAP_NULL;
20277 vm_named_entry_t named_entry;
20278
20279 if (!IP_VALID(port)) {
20280 return VM_MAP_NULL;
20281 }
20282
20283 if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
20284 return convert_port_to_map(port);
20285 }
20286
20287 named_entry = mach_memory_entry_from_port(port);
20288
20289 if ((named_entry->is_sub_map) &&
20290 (named_entry->protection & VM_PROT_WRITE)) {
20291 map = named_entry->backing.map;
20292 if (map->pmap != PMAP_NULL) {
20293 if (map->pmap == kernel_pmap) {
20294 panic("userspace has access "
20295 "to a kernel map %p", map);
20296 }
20297 pmap_require(map->pmap);
20298 }
20299 vm_map_reference(map);
20300 }
20301
20302 return map;
20303 }
20304
20305 /*
20306 * Export routines to other components for the things we access locally through
20307 * macros.
20308 */
20309 #undef current_map
20310 vm_map_t
current_map(void)20311 current_map(void)
20312 {
20313 return current_map_fast();
20314 }
20315
20316 /*
20317 * vm_map_reference:
20318 *
20319 * Takes a reference on the specified map.
20320 */
20321 void
vm_map_reference(vm_map_t map)20322 vm_map_reference(
20323 vm_map_t map)
20324 {
20325 if (__probable(map != VM_MAP_NULL)) {
20326 vm_map_require(map);
20327 os_ref_retain_raw(&map->map_refcnt, &map_refgrp);
20328 }
20329 }
20330
20331 /*
20332 * vm_map_deallocate:
20333 *
20334 * Removes a reference from the specified map,
20335 * destroying it if no references remain.
20336 * The map should not be locked.
20337 */
20338 void
vm_map_deallocate(vm_map_t map)20339 vm_map_deallocate(
20340 vm_map_t map)
20341 {
20342 if (__probable(map != VM_MAP_NULL)) {
20343 vm_map_require(map);
20344 if (os_ref_release_raw(&map->map_refcnt, &map_refgrp) == 0) {
20345 vm_map_destroy(map);
20346 }
20347 }
20348 }
20349
20350 void
vm_map_inspect_deallocate(vm_map_inspect_t map)20351 vm_map_inspect_deallocate(
20352 vm_map_inspect_t map)
20353 {
20354 vm_map_deallocate((vm_map_t)map);
20355 }
20356
20357 void
vm_map_read_deallocate(vm_map_read_t map)20358 vm_map_read_deallocate(
20359 vm_map_read_t map)
20360 {
20361 vm_map_deallocate((vm_map_t)map);
20362 }
20363
20364
20365 void
vm_map_disable_NX(vm_map_t map)20366 vm_map_disable_NX(vm_map_t map)
20367 {
20368 if (map == NULL) {
20369 return;
20370 }
20371 if (map->pmap == NULL) {
20372 return;
20373 }
20374
20375 pmap_disable_NX(map->pmap);
20376 }
20377
20378 void
vm_map_disallow_data_exec(vm_map_t map)20379 vm_map_disallow_data_exec(vm_map_t map)
20380 {
20381 if (map == NULL) {
20382 return;
20383 }
20384
20385 map->map_disallow_data_exec = TRUE;
20386 }
20387
20388 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
20389 * more descriptive.
20390 */
20391 void
vm_map_set_32bit(vm_map_t map)20392 vm_map_set_32bit(vm_map_t map)
20393 {
20394 #if defined(__arm64__)
20395 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
20396 #else
20397 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
20398 #endif
20399 }
20400
20401
20402 void
vm_map_set_64bit(vm_map_t map)20403 vm_map_set_64bit(vm_map_t map)
20404 {
20405 #if defined(__arm64__)
20406 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
20407 #else
20408 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
20409 #endif
20410 }
20411
20412 /*
20413 * Expand the maximum size of an existing map to the maximum supported.
20414 */
20415 void
vm_map_set_jumbo(vm_map_t map)20416 vm_map_set_jumbo(vm_map_t map)
20417 {
20418 #if defined (__arm64__) && !XNU_TARGET_OS_OSX
20419 vm_map_set_max_addr(map, ~0);
20420 #else /* arm64 */
20421 (void) map;
20422 #endif
20423 }
20424
20425 /*
20426 * This map has a JIT entitlement
20427 */
20428 void
vm_map_set_jit_entitled(vm_map_t map)20429 vm_map_set_jit_entitled(vm_map_t map)
20430 {
20431 #if defined (__arm64__)
20432 pmap_set_jit_entitled(map->pmap);
20433 #else /* arm64 */
20434 (void) map;
20435 #endif
20436 }
20437
20438 /*
20439 * Get status of this maps TPRO flag
20440 */
20441 boolean_t
vm_map_tpro(vm_map_t map)20442 vm_map_tpro(vm_map_t map)
20443 {
20444 #if defined (__arm64e__)
20445 return pmap_get_tpro(map->pmap);
20446 #else /* arm64e */
20447 (void) map;
20448 return false;
20449 #endif
20450 }
20451
20452 /*
20453 * This map has TPRO enabled
20454 */
20455 void
vm_map_set_tpro(vm_map_t map)20456 vm_map_set_tpro(vm_map_t map)
20457 {
20458 #if defined (__arm64e__)
20459 pmap_set_tpro(map->pmap);
20460 #else /* arm64e */
20461 (void) map;
20462 #endif
20463 }
20464
20465 /*
20466 * Expand the maximum size of an existing map.
20467 */
20468 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20469 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20470 {
20471 #if defined(__arm64__)
20472 vm_map_offset_t max_supported_offset;
20473 vm_map_offset_t old_max_offset;
20474
20475 vm_map_lock(map);
20476
20477 old_max_offset = map->max_offset;
20478 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20479
20480 new_max_offset = trunc_page(new_max_offset);
20481
20482 /* The address space cannot be shrunk using this routine. */
20483 if (old_max_offset >= new_max_offset) {
20484 vm_map_unlock(map);
20485 return;
20486 }
20487
20488 if (max_supported_offset < new_max_offset) {
20489 new_max_offset = max_supported_offset;
20490 }
20491
20492 map->max_offset = new_max_offset;
20493
20494 if (map->holelistenabled) {
20495 if (map->holes_list->prev->vme_end == old_max_offset) {
20496 /*
20497 * There is already a hole at the end of the map; simply make it bigger.
20498 */
20499 map->holes_list->prev->vme_end = map->max_offset;
20500 } else {
20501 /*
20502 * There is no hole at the end, so we need to create a new hole
20503 * for the new empty space we're creating.
20504 */
20505 struct vm_map_links *new_hole;
20506
20507 new_hole = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
20508 new_hole->start = old_max_offset;
20509 new_hole->end = map->max_offset;
20510 new_hole->prev = map->holes_list->prev;
20511 new_hole->next = (struct vm_map_entry *)map->holes_list;
20512 map->holes_list->prev->vme_next = (struct vm_map_entry *)new_hole;
20513 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20514 }
20515 }
20516
20517 vm_map_unlock(map);
20518 #else
20519 (void)map;
20520 (void)new_max_offset;
20521 #endif
20522 }
20523
20524 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20525 vm_compute_max_offset(boolean_t is64)
20526 {
20527 #if defined(__arm64__)
20528 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20529 #else
20530 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20531 #endif
20532 }
20533
20534 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20535 vm_map_get_max_aslr_slide_section(
20536 vm_map_t map __unused,
20537 int64_t *max_sections,
20538 int64_t *section_size)
20539 {
20540 #if defined(__arm64__)
20541 *max_sections = 3;
20542 *section_size = ARM_TT_TWIG_SIZE;
20543 #else
20544 *max_sections = 1;
20545 *section_size = 0;
20546 #endif
20547 }
20548
20549 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20550 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20551 {
20552 #if defined(__arm64__)
20553 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20554 * limited embedded address space; this is also meant to minimize pmap
20555 * memory usage on 16KB page systems.
20556 */
20557 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20558 #else
20559 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20560 #endif
20561 }
20562
20563 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20564 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20565 {
20566 #if defined(__arm64__)
20567 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20568 * of independent entropy on 16KB page systems.
20569 */
20570 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20571 #else
20572 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20573 #endif
20574 }
20575
20576 boolean_t
vm_map_is_64bit(vm_map_t map)20577 vm_map_is_64bit(
20578 vm_map_t map)
20579 {
20580 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20581 }
20582
20583 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20584 vm_map_has_hard_pagezero(
20585 vm_map_t map,
20586 vm_map_offset_t pagezero_size)
20587 {
20588 /*
20589 * XXX FBDP
20590 * We should lock the VM map (for read) here but we can get away
20591 * with it for now because there can't really be any race condition:
20592 * the VM map's min_offset is changed only when the VM map is created
20593 * and when the zero page is established (when the binary gets loaded),
20594 * and this routine gets called only when the task terminates and the
20595 * VM map is being torn down, and when a new map is created via
20596 * load_machfile()/execve().
20597 */
20598 return map->min_offset >= pagezero_size;
20599 }
20600
20601 /*
20602 * Raise a VM map's maximun offset.
20603 */
20604 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20605 vm_map_raise_max_offset(
20606 vm_map_t map,
20607 vm_map_offset_t new_max_offset)
20608 {
20609 kern_return_t ret;
20610
20611 vm_map_lock(map);
20612 ret = KERN_INVALID_ADDRESS;
20613
20614 if (new_max_offset >= map->max_offset) {
20615 if (!vm_map_is_64bit(map)) {
20616 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20617 map->max_offset = new_max_offset;
20618 ret = KERN_SUCCESS;
20619 }
20620 } else {
20621 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20622 map->max_offset = new_max_offset;
20623 ret = KERN_SUCCESS;
20624 }
20625 }
20626 }
20627
20628 vm_map_unlock(map);
20629 return ret;
20630 }
20631
20632
20633 /*
20634 * Raise a VM map's minimum offset.
20635 * To strictly enforce "page zero" reservation.
20636 */
20637 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20638 vm_map_raise_min_offset(
20639 vm_map_t map,
20640 vm_map_offset_t new_min_offset)
20641 {
20642 vm_map_entry_t first_entry;
20643
20644 new_min_offset = vm_map_round_page(new_min_offset,
20645 VM_MAP_PAGE_MASK(map));
20646
20647 vm_map_lock(map);
20648
20649 if (new_min_offset < map->min_offset) {
20650 /*
20651 * Can't move min_offset backwards, as that would expose
20652 * a part of the address space that was previously, and for
20653 * possibly good reasons, inaccessible.
20654 */
20655 vm_map_unlock(map);
20656 return KERN_INVALID_ADDRESS;
20657 }
20658 if (new_min_offset >= map->max_offset) {
20659 /* can't go beyond the end of the address space */
20660 vm_map_unlock(map);
20661 return KERN_INVALID_ADDRESS;
20662 }
20663
20664 first_entry = vm_map_first_entry(map);
20665 if (first_entry != vm_map_to_entry(map) &&
20666 first_entry->vme_start < new_min_offset) {
20667 /*
20668 * Some memory was already allocated below the new
20669 * minimun offset. It's too late to change it now...
20670 */
20671 vm_map_unlock(map);
20672 return KERN_NO_SPACE;
20673 }
20674
20675 map->min_offset = new_min_offset;
20676
20677 if (map->holelistenabled) {
20678 assert(map->holes_list);
20679 map->holes_list->start = new_min_offset;
20680 assert(new_min_offset < map->holes_list->end);
20681 }
20682
20683 vm_map_unlock(map);
20684
20685 return KERN_SUCCESS;
20686 }
20687
20688 /*
20689 * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20690 * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20691 * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20692 * have to reach over to the BSD data structures.
20693 */
20694
20695 uint64_t vm_map_set_size_limit_count = 0;
20696 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20697 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20698 {
20699 kern_return_t kr;
20700
20701 vm_map_lock(map);
20702 if (new_size_limit < map->size) {
20703 /* new limit should not be lower than its current size */
20704 DTRACE_VM2(vm_map_set_size_limit_fail,
20705 vm_map_size_t, map->size,
20706 uint64_t, new_size_limit);
20707 kr = KERN_FAILURE;
20708 } else if (new_size_limit == map->size_limit) {
20709 /* no change */
20710 kr = KERN_SUCCESS;
20711 } else {
20712 /* set new limit */
20713 DTRACE_VM2(vm_map_set_size_limit,
20714 vm_map_size_t, map->size,
20715 uint64_t, new_size_limit);
20716 if (new_size_limit != RLIM_INFINITY) {
20717 vm_map_set_size_limit_count++;
20718 }
20719 map->size_limit = new_size_limit;
20720 kr = KERN_SUCCESS;
20721 }
20722 vm_map_unlock(map);
20723 return kr;
20724 }
20725
20726 uint64_t vm_map_set_data_limit_count = 0;
20727 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20728 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20729 {
20730 kern_return_t kr;
20731
20732 vm_map_lock(map);
20733 if (new_data_limit < map->size) {
20734 /* new limit should not be lower than its current size */
20735 DTRACE_VM2(vm_map_set_data_limit_fail,
20736 vm_map_size_t, map->size,
20737 uint64_t, new_data_limit);
20738 kr = KERN_FAILURE;
20739 } else if (new_data_limit == map->data_limit) {
20740 /* no change */
20741 kr = KERN_SUCCESS;
20742 } else {
20743 /* set new limit */
20744 DTRACE_VM2(vm_map_set_data_limit,
20745 vm_map_size_t, map->size,
20746 uint64_t, new_data_limit);
20747 if (new_data_limit != RLIM_INFINITY) {
20748 vm_map_set_data_limit_count++;
20749 }
20750 map->data_limit = new_data_limit;
20751 kr = KERN_SUCCESS;
20752 }
20753 vm_map_unlock(map);
20754 return kr;
20755 }
20756
20757 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20758 vm_map_set_user_wire_limit(vm_map_t map,
20759 vm_size_t limit)
20760 {
20761 vm_map_lock(map);
20762 map->user_wire_limit = limit;
20763 vm_map_unlock(map);
20764 }
20765
20766
20767 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20768 vm_map_switch_protect(vm_map_t map,
20769 boolean_t val)
20770 {
20771 vm_map_lock(map);
20772 map->switch_protect = val;
20773 vm_map_unlock(map);
20774 }
20775
20776 extern int cs_process_enforcement_enable;
20777 boolean_t
vm_map_cs_enforcement(vm_map_t map)20778 vm_map_cs_enforcement(
20779 vm_map_t map)
20780 {
20781 if (cs_process_enforcement_enable) {
20782 return TRUE;
20783 }
20784 return map->cs_enforcement;
20785 }
20786
20787 kern_return_t
vm_map_cs_wx_enable(__unused vm_map_t map)20788 vm_map_cs_wx_enable(
20789 __unused vm_map_t map)
20790 {
20791 #if CODE_SIGNING_MONITOR
20792 kern_return_t ret = csm_allow_invalid_code(vm_map_pmap(map));
20793 if ((ret == KERN_SUCCESS) || (ret == KERN_NOT_SUPPORTED)) {
20794 return KERN_SUCCESS;
20795 }
20796 return ret;
20797 #else
20798 /* The VM manages WX memory entirely on its own */
20799 return true;
20800 #endif
20801 }
20802
20803 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20804 vm_map_cs_debugged_set(
20805 vm_map_t map,
20806 boolean_t val)
20807 {
20808 vm_map_lock(map);
20809 map->cs_debugged = val;
20810 vm_map_unlock(map);
20811 }
20812
20813 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20814 vm_map_cs_enforcement_set(
20815 vm_map_t map,
20816 boolean_t val)
20817 {
20818 vm_map_lock(map);
20819 map->cs_enforcement = val;
20820 pmap_set_vm_map_cs_enforced(map->pmap, val);
20821 vm_map_unlock(map);
20822 }
20823
20824 /*
20825 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20826 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20827 * bump both counters.
20828 */
20829 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20830 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20831 {
20832 pmap_t pmap = vm_map_pmap(map);
20833
20834 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20835 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20836 }
20837
20838 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20839 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20840 {
20841 pmap_t pmap = vm_map_pmap(map);
20842
20843 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20844 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20845 }
20846
20847 /* Add (generate) code signature for memory range */
20848 #if CONFIG_DYNAMIC_CODE_SIGNING
20849 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20850 vm_map_sign(vm_map_t map,
20851 vm_map_offset_t start,
20852 vm_map_offset_t end)
20853 {
20854 vm_map_entry_t entry;
20855 vm_page_t m;
20856 vm_object_t object;
20857
20858 /*
20859 * Vet all the input parameters and current type and state of the
20860 * underlaying object. Return with an error if anything is amiss.
20861 */
20862 if (map == VM_MAP_NULL) {
20863 return KERN_INVALID_ARGUMENT;
20864 }
20865
20866 if (__improbable(vm_map_range_overflows(map, start, end - start))) {
20867 return KERN_INVALID_ADDRESS;
20868 }
20869
20870 vm_map_lock_read(map);
20871
20872 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20873 /*
20874 * Must pass a valid non-submap address.
20875 */
20876 vm_map_unlock_read(map);
20877 return KERN_INVALID_ADDRESS;
20878 }
20879
20880 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20881 /*
20882 * Map entry doesn't cover the requested range. Not handling
20883 * this situation currently.
20884 */
20885 vm_map_unlock_read(map);
20886 return KERN_INVALID_ARGUMENT;
20887 }
20888
20889 object = VME_OBJECT(entry);
20890 if (object == VM_OBJECT_NULL) {
20891 /*
20892 * Object must already be present or we can't sign.
20893 */
20894 vm_map_unlock_read(map);
20895 return KERN_INVALID_ARGUMENT;
20896 }
20897
20898 vm_object_lock(object);
20899 vm_map_unlock_read(map);
20900
20901 while (start < end) {
20902 uint32_t refmod;
20903
20904 m = vm_page_lookup(object,
20905 start - entry->vme_start + VME_OFFSET(entry));
20906 if (m == VM_PAGE_NULL) {
20907 /* shoud we try to fault a page here? we can probably
20908 * demand it exists and is locked for this request */
20909 vm_object_unlock(object);
20910 return KERN_FAILURE;
20911 }
20912 /* deal with special page status */
20913 if (m->vmp_busy ||
20914 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20915 vm_object_unlock(object);
20916 return KERN_FAILURE;
20917 }
20918
20919 /* Page is OK... now "validate" it */
20920 /* This is the place where we'll call out to create a code
20921 * directory, later */
20922 /* XXX TODO4K: deal with 4k subpages individually? */
20923 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20924
20925 /* The page is now "clean" for codesigning purposes. That means
20926 * we don't consider it as modified (wpmapped) anymore. But
20927 * we'll disconnect the page so we note any future modification
20928 * attempts. */
20929 m->vmp_wpmapped = FALSE;
20930 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20931
20932 /* Pull the dirty status from the pmap, since we cleared the
20933 * wpmapped bit */
20934 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20935 SET_PAGE_DIRTY(m, FALSE);
20936 }
20937
20938 /* On to the next page */
20939 start += PAGE_SIZE;
20940 }
20941 vm_object_unlock(object);
20942
20943 return KERN_SUCCESS;
20944 }
20945 #endif
20946
20947 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20948 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20949 {
20950 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20951 vm_map_entry_t next_entry;
20952 kern_return_t kr = KERN_SUCCESS;
20953 VM_MAP_ZAP_DECLARE(zap_list);
20954
20955 vm_map_lock(map);
20956
20957 for (entry = vm_map_first_entry(map);
20958 entry != vm_map_to_entry(map);
20959 entry = next_entry) {
20960 next_entry = entry->vme_next;
20961
20962 if (!entry->is_sub_map &&
20963 VME_OBJECT(entry) &&
20964 (VME_OBJECT(entry)->internal == TRUE) &&
20965 (VME_OBJECT(entry)->ref_count == 1)) {
20966 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20967 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20968
20969 (void)vm_map_delete(map, entry->vme_start,
20970 entry->vme_end, VM_MAP_REMOVE_NO_YIELD,
20971 KMEM_GUARD_NONE, &zap_list);
20972 }
20973 }
20974
20975 vm_map_unlock(map);
20976
20977 vm_map_zap_dispose(&zap_list);
20978
20979 return kr;
20980 }
20981
20982
20983 #if DEVELOPMENT || DEBUG
20984
20985 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20986 vm_map_disconnect_page_mappings(
20987 vm_map_t map,
20988 boolean_t do_unnest)
20989 {
20990 vm_map_entry_t entry;
20991 ledger_amount_t byte_count = 0;
20992
20993 if (do_unnest == TRUE) {
20994 #ifndef NO_NESTED_PMAP
20995 vm_map_lock(map);
20996
20997 for (entry = vm_map_first_entry(map);
20998 entry != vm_map_to_entry(map);
20999 entry = entry->vme_next) {
21000 if (entry->is_sub_map && entry->use_pmap) {
21001 /*
21002 * Make sure the range between the start of this entry and
21003 * the end of this entry is no longer nested, so that
21004 * we will only remove mappings from the pmap in use by this
21005 * this task
21006 */
21007 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
21008 }
21009 }
21010 vm_map_unlock(map);
21011 #endif
21012 }
21013 vm_map_lock_read(map);
21014
21015 ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
21016
21017 for (entry = vm_map_first_entry(map);
21018 entry != vm_map_to_entry(map);
21019 entry = entry->vme_next) {
21020 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
21021 (VME_OBJECT(entry)->phys_contiguous))) {
21022 continue;
21023 }
21024 if (entry->is_sub_map) {
21025 assert(!entry->use_pmap);
21026 }
21027
21028 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
21029 }
21030 vm_map_unlock_read(map);
21031
21032 return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
21033 }
21034
21035 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)21036 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
21037 {
21038 vm_object_t object = NULL;
21039 vm_object_offset_t offset;
21040 vm_prot_t prot;
21041 boolean_t wired;
21042 vm_map_version_t version;
21043 vm_map_t real_map;
21044 int result = KERN_FAILURE;
21045
21046 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
21047 vm_map_lock(map);
21048
21049 result = vm_map_lookup_and_lock_object(&map, vaddr, VM_PROT_READ,
21050 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
21051 NULL, &real_map, NULL);
21052 if (object == NULL) {
21053 result = KERN_MEMORY_ERROR;
21054 } else if (object->pager) {
21055 result = vm_compressor_pager_inject_error(object->pager,
21056 offset);
21057 } else {
21058 result = KERN_MEMORY_PRESENT;
21059 }
21060
21061 if (object != NULL) {
21062 vm_object_unlock(object);
21063 }
21064
21065 if (real_map != map) {
21066 vm_map_unlock(real_map);
21067 }
21068 vm_map_unlock(map);
21069
21070 return result;
21071 }
21072
21073 #endif
21074
21075
21076 #if CONFIG_FREEZE
21077
21078
21079 extern struct freezer_context freezer_context_global;
21080 AbsoluteTime c_freezer_last_yield_ts = 0;
21081
21082 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
21083 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
21084
21085 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)21086 vm_map_freeze(
21087 task_t task,
21088 unsigned int *purgeable_count,
21089 unsigned int *wired_count,
21090 unsigned int *clean_count,
21091 unsigned int *dirty_count,
21092 unsigned int dirty_budget,
21093 unsigned int *shared_count,
21094 int *freezer_error_code,
21095 boolean_t eval_only)
21096 {
21097 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
21098 kern_return_t kr = KERN_SUCCESS;
21099 boolean_t evaluation_phase = TRUE;
21100 vm_object_t cur_shared_object = NULL;
21101 int cur_shared_obj_ref_cnt = 0;
21102 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
21103
21104 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
21105
21106 /*
21107 * We need the exclusive lock here so that we can
21108 * block any page faults or lookups while we are
21109 * in the middle of freezing this vm map.
21110 */
21111 vm_map_t map = task->map;
21112
21113 vm_map_lock(map);
21114
21115 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
21116
21117 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
21118 if (vm_compressor_low_on_space()) {
21119 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
21120 }
21121
21122 if (vm_swap_low_on_space()) {
21123 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
21124 }
21125
21126 kr = KERN_NO_SPACE;
21127 goto done;
21128 }
21129
21130 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
21131 /*
21132 * In-memory compressor backing the freezer. No disk.
21133 * So no need to do the evaluation phase.
21134 */
21135 evaluation_phase = FALSE;
21136
21137 if (eval_only == TRUE) {
21138 /*
21139 * We don't support 'eval_only' mode
21140 * in this non-swap config.
21141 */
21142 *freezer_error_code = FREEZER_ERROR_GENERIC;
21143 kr = KERN_INVALID_ARGUMENT;
21144 goto done;
21145 }
21146
21147 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
21148 clock_get_uptime(&c_freezer_last_yield_ts);
21149 }
21150 again:
21151
21152 for (entry2 = vm_map_first_entry(map);
21153 entry2 != vm_map_to_entry(map);
21154 entry2 = entry2->vme_next) {
21155 vm_object_t src_object;
21156
21157 if (entry2->is_sub_map) {
21158 continue;
21159 }
21160
21161 src_object = VME_OBJECT(entry2);
21162 if (!src_object ||
21163 src_object->phys_contiguous ||
21164 !src_object->internal) {
21165 continue;
21166 }
21167
21168 /* If eligible, scan the entry, moving eligible pages over to our parent object */
21169
21170 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
21171 /*
21172 * We skip purgeable objects during evaluation phase only.
21173 * If we decide to freeze this process, we'll explicitly
21174 * purge these objects before we go around again with
21175 * 'evaluation_phase' set to FALSE.
21176 */
21177
21178 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
21179 /*
21180 * We want to purge objects that may not belong to this task but are mapped
21181 * in this task alone. Since we already purged this task's purgeable memory
21182 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
21183 * on this task's purgeable objects. Hence the check for only volatile objects.
21184 */
21185 if (evaluation_phase == FALSE &&
21186 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
21187 (src_object->ref_count == 1)) {
21188 vm_object_lock(src_object);
21189 vm_object_purge(src_object, 0);
21190 vm_object_unlock(src_object);
21191 }
21192 continue;
21193 }
21194
21195 /*
21196 * Pages belonging to this object could be swapped to disk.
21197 * Make sure it's not a shared object because we could end
21198 * up just bringing it back in again.
21199 *
21200 * We try to optimize somewhat by checking for objects that are mapped
21201 * more than once within our own map. But we don't do full searches,
21202 * we just look at the entries following our current entry.
21203 */
21204
21205 if (src_object->ref_count > 1) {
21206 if (src_object != cur_shared_object) {
21207 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
21208 dirty_shared_count += obj_pages_snapshot;
21209
21210 cur_shared_object = src_object;
21211 cur_shared_obj_ref_cnt = 1;
21212 continue;
21213 } else {
21214 cur_shared_obj_ref_cnt++;
21215 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
21216 /*
21217 * Fall through to below and treat this object as private.
21218 * So deduct its pages from our shared total and add it to the
21219 * private total.
21220 */
21221
21222 dirty_shared_count -= obj_pages_snapshot;
21223 dirty_private_count += obj_pages_snapshot;
21224 } else {
21225 continue;
21226 }
21227 }
21228 }
21229
21230
21231 if (src_object->ref_count == 1) {
21232 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
21233 }
21234
21235 if (evaluation_phase == TRUE) {
21236 continue;
21237 }
21238 }
21239
21240 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
21241 *wired_count += src_object->wired_page_count;
21242
21243 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
21244 if (vm_compressor_low_on_space()) {
21245 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
21246 }
21247
21248 if (vm_swap_low_on_space()) {
21249 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
21250 }
21251
21252 kr = KERN_NO_SPACE;
21253 break;
21254 }
21255 if (paged_out_count >= dirty_budget) {
21256 break;
21257 }
21258 dirty_budget -= paged_out_count;
21259 }
21260
21261 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
21262 if (evaluation_phase) {
21263 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
21264
21265 if (dirty_shared_count > shared_pages_threshold) {
21266 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
21267 kr = KERN_FAILURE;
21268 goto done;
21269 }
21270
21271 if (dirty_shared_count &&
21272 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
21273 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
21274 kr = KERN_FAILURE;
21275 goto done;
21276 }
21277
21278 evaluation_phase = FALSE;
21279 dirty_shared_count = dirty_private_count = 0;
21280
21281 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
21282 clock_get_uptime(&c_freezer_last_yield_ts);
21283
21284 if (eval_only) {
21285 kr = KERN_SUCCESS;
21286 goto done;
21287 }
21288
21289 vm_purgeable_purge_task_owned(task);
21290
21291 goto again;
21292 } else {
21293 kr = KERN_SUCCESS;
21294 }
21295
21296 done:
21297 vm_map_unlock(map);
21298
21299 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
21300 vm_object_compressed_freezer_done();
21301 }
21302 return kr;
21303 }
21304
21305 #endif
21306
21307 /*
21308 * vm_map_entry_should_cow_for_true_share:
21309 *
21310 * Determines if the map entry should be clipped and setup for copy-on-write
21311 * to avoid applying "true_share" to a large VM object when only a subset is
21312 * targeted.
21313 *
21314 * For now, we target only the map entries created for the Objective C
21315 * Garbage Collector, which initially have the following properties:
21316 * - alias == VM_MEMORY_MALLOC
21317 * - wired_count == 0
21318 * - !needs_copy
21319 * and a VM object with:
21320 * - internal
21321 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
21322 * - !true_share
21323 * - vo_size == ANON_CHUNK_SIZE
21324 *
21325 * Only non-kernel map entries.
21326 */
21327 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)21328 vm_map_entry_should_cow_for_true_share(
21329 vm_map_entry_t entry)
21330 {
21331 vm_object_t object;
21332
21333 if (entry->is_sub_map) {
21334 /* entry does not point at a VM object */
21335 return FALSE;
21336 }
21337
21338 if (entry->needs_copy) {
21339 /* already set for copy_on_write: done! */
21340 return FALSE;
21341 }
21342
21343 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
21344 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
21345 /* not a malloc heap or Obj-C Garbage Collector heap */
21346 return FALSE;
21347 }
21348
21349 if (entry->wired_count) {
21350 /* wired: can't change the map entry... */
21351 vm_counters.should_cow_but_wired++;
21352 return FALSE;
21353 }
21354
21355 object = VME_OBJECT(entry);
21356
21357 if (object == VM_OBJECT_NULL) {
21358 /* no object yet... */
21359 return FALSE;
21360 }
21361
21362 if (!object->internal) {
21363 /* not an internal object */
21364 return FALSE;
21365 }
21366
21367 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
21368 /* not the default copy strategy */
21369 return FALSE;
21370 }
21371
21372 if (object->true_share) {
21373 /* already true_share: too late to avoid it */
21374 return FALSE;
21375 }
21376
21377 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
21378 object->vo_size != ANON_CHUNK_SIZE) {
21379 /* ... not an object created for the ObjC Garbage Collector */
21380 return FALSE;
21381 }
21382
21383 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
21384 object->vo_size != 2048 * 4096) {
21385 /* ... not a "MALLOC_SMALL" heap */
21386 return FALSE;
21387 }
21388
21389 /*
21390 * All the criteria match: we have a large object being targeted for "true_share".
21391 * To limit the adverse side-effects linked with "true_share", tell the caller to
21392 * try and avoid setting up the entire object for "true_share" by clipping the
21393 * targeted range and setting it up for copy-on-write.
21394 */
21395 return TRUE;
21396 }
21397
21398 uint64_t vm_map_range_overflows_count = 0;
21399 TUNABLE_WRITEABLE(boolean_t, vm_map_range_overflows_log, "vm_map_range_overflows_log", FALSE);
21400 bool
vm_map_range_overflows(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size)21401 vm_map_range_overflows(
21402 vm_map_t map,
21403 vm_map_offset_t addr,
21404 vm_map_size_t size)
21405 {
21406 vm_map_offset_t start, end, sum;
21407 vm_map_offset_t pgmask;
21408
21409 if (size == 0) {
21410 /* empty range -> no overflow */
21411 return false;
21412 }
21413 pgmask = vm_map_page_mask(map);
21414 start = vm_map_trunc_page_mask(addr, pgmask);
21415 end = vm_map_round_page_mask(addr + size, pgmask);
21416 if (__improbable(os_add_overflow(addr, size, &sum) || end <= start)) {
21417 vm_map_range_overflows_count++;
21418 if (vm_map_range_overflows_log) {
21419 printf("%d[%s] vm_map_range_overflows addr 0x%llx size 0x%llx pgmask 0x%llx\n",
21420 proc_selfpid(),
21421 proc_best_name(current_proc()),
21422 (uint64_t)addr,
21423 (uint64_t)size,
21424 (uint64_t)pgmask);
21425 }
21426 DTRACE_VM4(vm_map_range_overflows,
21427 vm_map_t, map,
21428 uint32_t, pgmask,
21429 uint64_t, (uint64_t)addr,
21430 uint64_t, (uint64_t)size);
21431 return true;
21432 }
21433 return false;
21434 }
21435
21436 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21437 vm_map_round_page_mask(
21438 vm_map_offset_t offset,
21439 vm_map_offset_t mask)
21440 {
21441 return VM_MAP_ROUND_PAGE(offset, mask);
21442 }
21443
21444 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21445 vm_map_trunc_page_mask(
21446 vm_map_offset_t offset,
21447 vm_map_offset_t mask)
21448 {
21449 return VM_MAP_TRUNC_PAGE(offset, mask);
21450 }
21451
21452 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)21453 vm_map_page_aligned(
21454 vm_map_offset_t offset,
21455 vm_map_offset_t mask)
21456 {
21457 return ((offset) & mask) == 0;
21458 }
21459
21460 int
vm_map_page_shift(vm_map_t map)21461 vm_map_page_shift(
21462 vm_map_t map)
21463 {
21464 return VM_MAP_PAGE_SHIFT(map);
21465 }
21466
21467 int
vm_map_page_size(vm_map_t map)21468 vm_map_page_size(
21469 vm_map_t map)
21470 {
21471 return VM_MAP_PAGE_SIZE(map);
21472 }
21473
21474 vm_map_offset_t
vm_map_page_mask(vm_map_t map)21475 vm_map_page_mask(
21476 vm_map_t map)
21477 {
21478 return VM_MAP_PAGE_MASK(map);
21479 }
21480
21481 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)21482 vm_map_set_page_shift(
21483 vm_map_t map,
21484 int pageshift)
21485 {
21486 if (map->hdr.nentries != 0) {
21487 /* too late to change page size */
21488 return KERN_FAILURE;
21489 }
21490
21491 map->hdr.page_shift = (uint16_t)pageshift;
21492
21493 return KERN_SUCCESS;
21494 }
21495
21496 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21497 vm_map_query_volatile(
21498 vm_map_t map,
21499 mach_vm_size_t *volatile_virtual_size_p,
21500 mach_vm_size_t *volatile_resident_size_p,
21501 mach_vm_size_t *volatile_compressed_size_p,
21502 mach_vm_size_t *volatile_pmap_size_p,
21503 mach_vm_size_t *volatile_compressed_pmap_size_p)
21504 {
21505 mach_vm_size_t volatile_virtual_size;
21506 mach_vm_size_t volatile_resident_count;
21507 mach_vm_size_t volatile_compressed_count;
21508 mach_vm_size_t volatile_pmap_count;
21509 mach_vm_size_t volatile_compressed_pmap_count;
21510 mach_vm_size_t resident_count;
21511 vm_map_entry_t entry;
21512 vm_object_t object;
21513
21514 /* map should be locked by caller */
21515
21516 volatile_virtual_size = 0;
21517 volatile_resident_count = 0;
21518 volatile_compressed_count = 0;
21519 volatile_pmap_count = 0;
21520 volatile_compressed_pmap_count = 0;
21521
21522 for (entry = vm_map_first_entry(map);
21523 entry != vm_map_to_entry(map);
21524 entry = entry->vme_next) {
21525 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
21526
21527 if (entry->is_sub_map) {
21528 continue;
21529 }
21530 if (!(entry->protection & VM_PROT_WRITE)) {
21531 continue;
21532 }
21533 object = VME_OBJECT(entry);
21534 if (object == VM_OBJECT_NULL) {
21535 continue;
21536 }
21537 if (object->purgable != VM_PURGABLE_VOLATILE &&
21538 object->purgable != VM_PURGABLE_EMPTY) {
21539 continue;
21540 }
21541 if (VME_OFFSET(entry)) {
21542 /*
21543 * If the map entry has been split and the object now
21544 * appears several times in the VM map, we don't want
21545 * to count the object's resident_page_count more than
21546 * once. We count it only for the first one, starting
21547 * at offset 0 and ignore the other VM map entries.
21548 */
21549 continue;
21550 }
21551 resident_count = object->resident_page_count;
21552 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21553 resident_count = 0;
21554 } else {
21555 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21556 }
21557
21558 volatile_virtual_size += entry->vme_end - entry->vme_start;
21559 volatile_resident_count += resident_count;
21560 if (object->pager) {
21561 volatile_compressed_count +=
21562 vm_compressor_pager_get_count(object->pager);
21563 }
21564 pmap_compressed_bytes = 0;
21565 pmap_resident_bytes =
21566 pmap_query_resident(map->pmap,
21567 entry->vme_start,
21568 entry->vme_end,
21569 &pmap_compressed_bytes);
21570 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21571 volatile_compressed_pmap_count += (pmap_compressed_bytes
21572 / PAGE_SIZE);
21573 }
21574
21575 /* map is still locked on return */
21576
21577 *volatile_virtual_size_p = volatile_virtual_size;
21578 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21579 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21580 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21581 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21582
21583 return KERN_SUCCESS;
21584 }
21585
21586 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21587 vm_map_sizes(vm_map_t map,
21588 vm_map_size_t * psize,
21589 vm_map_size_t * pfree,
21590 vm_map_size_t * plargest_free)
21591 {
21592 vm_map_entry_t entry;
21593 vm_map_offset_t prev;
21594 vm_map_size_t free, total_free, largest_free;
21595 boolean_t end;
21596
21597 if (!map) {
21598 *psize = *pfree = *plargest_free = 0;
21599 return;
21600 }
21601 total_free = largest_free = 0;
21602
21603 vm_map_lock_read(map);
21604 if (psize) {
21605 *psize = map->max_offset - map->min_offset;
21606 }
21607
21608 prev = map->min_offset;
21609 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21610 end = (entry == vm_map_to_entry(map));
21611
21612 if (end) {
21613 free = entry->vme_end - prev;
21614 } else {
21615 free = entry->vme_start - prev;
21616 }
21617
21618 total_free += free;
21619 if (free > largest_free) {
21620 largest_free = free;
21621 }
21622
21623 if (end) {
21624 break;
21625 }
21626 prev = entry->vme_end;
21627 }
21628 vm_map_unlock_read(map);
21629 if (pfree) {
21630 *pfree = total_free;
21631 }
21632 if (plargest_free) {
21633 *plargest_free = largest_free;
21634 }
21635 }
21636
21637 #if VM_SCAN_FOR_SHADOW_CHAIN
21638 int vm_map_shadow_max(vm_map_t map);
21639 int
vm_map_shadow_max(vm_map_t map)21640 vm_map_shadow_max(
21641 vm_map_t map)
21642 {
21643 int shadows, shadows_max;
21644 vm_map_entry_t entry;
21645 vm_object_t object, next_object;
21646
21647 if (map == NULL) {
21648 return 0;
21649 }
21650
21651 shadows_max = 0;
21652
21653 vm_map_lock_read(map);
21654
21655 for (entry = vm_map_first_entry(map);
21656 entry != vm_map_to_entry(map);
21657 entry = entry->vme_next) {
21658 if (entry->is_sub_map) {
21659 continue;
21660 }
21661 object = VME_OBJECT(entry);
21662 if (object == NULL) {
21663 continue;
21664 }
21665 vm_object_lock_shared(object);
21666 for (shadows = 0;
21667 object->shadow != NULL;
21668 shadows++, object = next_object) {
21669 next_object = object->shadow;
21670 vm_object_lock_shared(next_object);
21671 vm_object_unlock(object);
21672 }
21673 vm_object_unlock(object);
21674 if (shadows > shadows_max) {
21675 shadows_max = shadows;
21676 }
21677 }
21678
21679 vm_map_unlock_read(map);
21680
21681 return shadows_max;
21682 }
21683 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21684
21685 void
vm_commit_pagezero_status(vm_map_t lmap)21686 vm_commit_pagezero_status(vm_map_t lmap)
21687 {
21688 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21689 }
21690
21691 #if XNU_TARGET_OS_OSX
21692 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21693 vm_map_set_high_start(
21694 vm_map_t map,
21695 vm_map_offset_t high_start)
21696 {
21697 map->vmmap_high_start = high_start;
21698 }
21699 #endif /* XNU_TARGET_OS_OSX */
21700
21701 #if CODE_SIGNING_MONITOR
21702
21703 kern_return_t
vm_map_entry_cs_associate(vm_map_t map,vm_map_entry_t entry,vm_map_kernel_flags_t vmk_flags)21704 vm_map_entry_cs_associate(
21705 vm_map_t map,
21706 vm_map_entry_t entry,
21707 vm_map_kernel_flags_t vmk_flags)
21708 {
21709 vm_object_t cs_object, cs_shadow, backing_object;
21710 vm_object_offset_t cs_offset, backing_offset;
21711 void *cs_blobs;
21712 struct vnode *cs_vnode;
21713 kern_return_t cs_ret;
21714
21715 if (map->pmap == NULL ||
21716 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
21717 (csm_address_space_exempt(map->pmap) == KERN_SUCCESS) ||
21718 VME_OBJECT(entry) == VM_OBJECT_NULL) {
21719 return KERN_SUCCESS;
21720 }
21721
21722 if (!(entry->protection & VM_PROT_EXECUTE)) {
21723 /*
21724 * This memory region is not executable, so the code-signing
21725 * monitor would usually not care about it...
21726 */
21727 if (vmk_flags.vmkf_remap_prot_copy &&
21728 (entry->max_protection & VM_PROT_EXECUTE)) {
21729 /*
21730 * ... except if the memory region is being remapped
21731 * from r-x/r-x to rw-/rwx via vm_protect(VM_PROT_COPY)
21732 * which is what a debugger or dtrace would be doing
21733 * to prepare to modify an executable page to insert
21734 * a breakpoint or activate a probe.
21735 * In that case, fall through so that we can mark
21736 * this region as being "debugged" and no longer
21737 * strictly code-signed.
21738 */
21739 } else {
21740 /*
21741 * Really not executable, so no need to tell the
21742 * code-signing monitor.
21743 */
21744 return KERN_SUCCESS;
21745 }
21746 }
21747
21748 vm_map_lock_assert_exclusive(map);
21749
21750 if (entry->used_for_jit) {
21751 cs_ret = csm_associate_jit_region(
21752 map->pmap,
21753 entry->vme_start,
21754 entry->vme_end - entry->vme_start);
21755 goto done;
21756 }
21757
21758 if (vmk_flags.vmkf_remap_prot_copy) {
21759 cs_ret = csm_associate_debug_region(
21760 map->pmap,
21761 entry->vme_start,
21762 entry->vme_end - entry->vme_start);
21763 if (cs_ret == KERN_SUCCESS) {
21764 entry->vme_xnu_user_debug = TRUE;
21765 }
21766 #if DEVELOPMENT || DEBUG
21767 if (vm_log_xnu_user_debug) {
21768 printf("FBDP %d[%s] %s:%d map %p entry %p [ 0x%llx 0x%llx ] vme_xnu_user_debug=%d cs_ret %d\n",
21769 proc_selfpid(),
21770 (get_bsdtask_info(current_task()) ? proc_name_address(get_bsdtask_info(current_task())) : "?"),
21771 __FUNCTION__, __LINE__,
21772 map, entry,
21773 (uint64_t)entry->vme_start, (uint64_t)entry->vme_end,
21774 entry->vme_xnu_user_debug,
21775 cs_ret);
21776 }
21777 #endif /* DEVELOPMENT || DEBUG */
21778 goto done;
21779 }
21780
21781 cs_object = VME_OBJECT(entry);
21782 vm_object_lock_shared(cs_object);
21783 cs_offset = VME_OFFSET(entry);
21784
21785 /* find the VM object backed by the code-signed vnode */
21786 for (;;) {
21787 /* go to the bottom of cs_object's shadow chain */
21788 for (;
21789 cs_object->shadow != VM_OBJECT_NULL;
21790 cs_object = cs_shadow) {
21791 cs_shadow = cs_object->shadow;
21792 cs_offset += cs_object->vo_shadow_offset;
21793 vm_object_lock_shared(cs_shadow);
21794 vm_object_unlock(cs_object);
21795 }
21796 if (cs_object->internal ||
21797 cs_object->pager == MEMORY_OBJECT_NULL) {
21798 vm_object_unlock(cs_object);
21799 return KERN_SUCCESS;
21800 }
21801
21802 cs_offset += cs_object->paging_offset;
21803
21804 /*
21805 * cs_object could be backed by a:
21806 * vnode_pager
21807 * apple_protect_pager
21808 * shared_region_pager
21809 * fourk_pager (multiple backing objects -> fail?)
21810 * ask the pager if it has a backing VM object
21811 */
21812 if (!memory_object_backing_object(cs_object->pager,
21813 cs_offset,
21814 &backing_object,
21815 &backing_offset)) {
21816 /* no backing object: cs_object is it */
21817 break;
21818 }
21819
21820 /* look down the backing object's shadow chain */
21821 vm_object_lock_shared(backing_object);
21822 vm_object_unlock(cs_object);
21823 cs_object = backing_object;
21824 cs_offset = backing_offset;
21825 }
21826
21827 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
21828 if (cs_vnode == NULL) {
21829 /* no vnode, no code signatures to associate */
21830 cs_ret = KERN_SUCCESS;
21831 } else {
21832 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
21833 &cs_blobs);
21834 assert(cs_ret == KERN_SUCCESS);
21835 cs_ret = cs_associate_blob_with_mapping(map->pmap,
21836 entry->vme_start,
21837 (entry->vme_end - entry->vme_start),
21838 cs_offset,
21839 cs_blobs);
21840 }
21841 vm_object_unlock(cs_object);
21842 cs_object = VM_OBJECT_NULL;
21843
21844 done:
21845 if (cs_ret == KERN_SUCCESS) {
21846 DTRACE_VM2(vm_map_entry_cs_associate_success,
21847 vm_map_offset_t, entry->vme_start,
21848 vm_map_offset_t, entry->vme_end);
21849 if (vm_map_executable_immutable) {
21850 /*
21851 * Prevent this executable
21852 * mapping from being unmapped
21853 * or modified.
21854 */
21855 entry->vme_permanent = TRUE;
21856 }
21857 /*
21858 * pmap says it will validate the
21859 * code-signing validity of pages
21860 * faulted in via this mapping, so
21861 * this map entry should be marked so
21862 * that vm_fault() bypasses code-signing
21863 * validation for faults coming through
21864 * this mapping.
21865 */
21866 entry->csm_associated = TRUE;
21867 } else if (cs_ret == KERN_NOT_SUPPORTED) {
21868 /*
21869 * pmap won't check the code-signing
21870 * validity of pages faulted in via
21871 * this mapping, so VM should keep
21872 * doing it.
21873 */
21874 DTRACE_VM3(vm_map_entry_cs_associate_off,
21875 vm_map_offset_t, entry->vme_start,
21876 vm_map_offset_t, entry->vme_end,
21877 int, cs_ret);
21878 } else {
21879 /*
21880 * A real error: do not allow
21881 * execution in this mapping.
21882 */
21883 DTRACE_VM3(vm_map_entry_cs_associate_failure,
21884 vm_map_offset_t, entry->vme_start,
21885 vm_map_offset_t, entry->vme_end,
21886 int, cs_ret);
21887 if (vmk_flags.vmkf_overwrite_immutable) {
21888 /*
21889 * We can get here when we remap an apple_protect pager
21890 * on top of an already cs_associated executable mapping
21891 * with the same code signatures, so we don't want to
21892 * lose VM_PROT_EXECUTE in that case...
21893 */
21894 } else {
21895 entry->protection &= ~VM_PROT_ALLEXEC;
21896 entry->max_protection &= ~VM_PROT_ALLEXEC;
21897 }
21898 }
21899
21900 return cs_ret;
21901 }
21902
21903 #endif /* CODE_SIGNING_MONITOR */
21904
21905 /*
21906 * FORKED CORPSE FOOTPRINT
21907 *
21908 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21909 * empty since it never ran and never got to fault in any pages.
21910 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21911 * a forked corpse would therefore return very little information.
21912 *
21913 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21914 * to vm_map_fork() to collect footprint information from the original VM map
21915 * and its pmap, and store it in the forked corpse's VM map. That information
21916 * is stored in place of the VM map's "hole list" since we'll never need to
21917 * lookup for holes in the corpse's map.
21918 *
21919 * The corpse's footprint info looks like this:
21920 *
21921 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21922 * as follows:
21923 * +---------------------------------------+
21924 * header-> | cf_size |
21925 * +-------------------+-------------------+
21926 * | cf_last_region | cf_last_zeroes |
21927 * +-------------------+-------------------+
21928 * region1-> | cfr_vaddr |
21929 * +-------------------+-------------------+
21930 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21931 * +---------------------------------------+
21932 * | d4 | d5 | ... |
21933 * +---------------------------------------+
21934 * | ... |
21935 * +-------------------+-------------------+
21936 * | dy | dz | na | na | cfr_vaddr... | <-region2
21937 * +-------------------+-------------------+
21938 * | cfr_vaddr (ctd) | cfr_num_pages |
21939 * +---------------------------------------+
21940 * | d0 | d1 ... |
21941 * +---------------------------------------+
21942 * ...
21943 * +---------------------------------------+
21944 * last region-> | cfr_vaddr |
21945 * +---------------------------------------+
21946 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21947 * +---------------------------------------+
21948 * ...
21949 * +---------------------------------------+
21950 * | dx | dy | dz | na | na | na | na | na |
21951 * +---------------------------------------+
21952 *
21953 * where:
21954 * cf_size: total size of the buffer (rounded to page size)
21955 * cf_last_region: offset in the buffer of the last "region" sub-header
21956 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21957 * of last region
21958 * cfr_vaddr: virtual address of the start of the covered "region"
21959 * cfr_num_pages: number of pages in the covered "region"
21960 * d*: disposition of the page at that virtual address
21961 * Regions in the buffer are word-aligned.
21962 *
21963 * We estimate the size of the buffer based on the number of memory regions
21964 * and the virtual size of the address space. While copying each memory region
21965 * during vm_map_fork(), we also collect the footprint info for that region
21966 * and store it in the buffer, packing it as much as possible (coalescing
21967 * contiguous memory regions to avoid having too many region headers and
21968 * avoiding long streaks of "zero" page dispositions by splitting footprint
21969 * "regions", so the number of regions in the footprint buffer might not match
21970 * the number of memory regions in the address space.
21971 *
21972 * We also have to copy the original task's "nonvolatile" ledgers since that's
21973 * part of the footprint and will need to be reported to any tool asking for
21974 * the footprint information of the forked corpse.
21975 */
21976
21977 uint64_t vm_map_corpse_footprint_count = 0;
21978 uint64_t vm_map_corpse_footprint_size_avg = 0;
21979 uint64_t vm_map_corpse_footprint_size_max = 0;
21980 uint64_t vm_map_corpse_footprint_full = 0;
21981 uint64_t vm_map_corpse_footprint_no_buf = 0;
21982
21983 struct vm_map_corpse_footprint_header {
21984 vm_size_t cf_size; /* allocated buffer size */
21985 uint32_t cf_last_region; /* offset of last region in buffer */
21986 union {
21987 uint32_t cfu_last_zeroes; /* during creation:
21988 * number of "zero" dispositions at
21989 * end of last region */
21990 uint32_t cfu_hint_region; /* during lookup:
21991 * offset of last looked up region */
21992 #define cf_last_zeroes cfu.cfu_last_zeroes
21993 #define cf_hint_region cfu.cfu_hint_region
21994 } cfu;
21995 };
21996 typedef uint8_t cf_disp_t;
21997 struct vm_map_corpse_footprint_region {
21998 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21999 uint32_t cfr_num_pages; /* number of pages in this "region" */
22000 cf_disp_t cfr_disposition[0]; /* disposition of each page */
22001 } __attribute__((packed));
22002
22003 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)22004 vm_page_disposition_to_cf_disp(
22005 int disposition)
22006 {
22007 assert(sizeof(cf_disp_t) == 1);
22008 /* relocate bits that don't fit in a "uint8_t" */
22009 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
22010 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
22011 }
22012 /* cast gets rid of extra bits */
22013 return (cf_disp_t) disposition;
22014 }
22015
22016 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)22017 vm_page_cf_disp_to_disposition(
22018 cf_disp_t cf_disp)
22019 {
22020 int disposition;
22021
22022 assert(sizeof(cf_disp_t) == 1);
22023 disposition = (int) cf_disp;
22024 /* move relocated bits back in place */
22025 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
22026 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
22027 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
22028 }
22029 return disposition;
22030 }
22031
22032 /*
22033 * vm_map_corpse_footprint_new_region:
22034 * closes the current footprint "region" and creates a new one
22035 *
22036 * Returns NULL if there's not enough space in the buffer for a new region.
22037 */
22038 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)22039 vm_map_corpse_footprint_new_region(
22040 struct vm_map_corpse_footprint_header *footprint_header)
22041 {
22042 uintptr_t footprint_edge;
22043 uint32_t new_region_offset;
22044 struct vm_map_corpse_footprint_region *footprint_region;
22045 struct vm_map_corpse_footprint_region *new_footprint_region;
22046
22047 footprint_edge = ((uintptr_t)footprint_header +
22048 footprint_header->cf_size);
22049 footprint_region = ((struct vm_map_corpse_footprint_region *)
22050 ((char *)footprint_header +
22051 footprint_header->cf_last_region));
22052 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
22053 footprint_edge);
22054
22055 /* get rid of trailing zeroes in the last region */
22056 assert(footprint_region->cfr_num_pages >=
22057 footprint_header->cf_last_zeroes);
22058 footprint_region->cfr_num_pages -=
22059 footprint_header->cf_last_zeroes;
22060 footprint_header->cf_last_zeroes = 0;
22061
22062 /* reuse this region if it's now empty */
22063 if (footprint_region->cfr_num_pages == 0) {
22064 return footprint_region;
22065 }
22066
22067 /* compute offset of new region */
22068 new_region_offset = footprint_header->cf_last_region;
22069 new_region_offset += sizeof(*footprint_region);
22070 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
22071 new_region_offset = roundup(new_region_offset, sizeof(int));
22072
22073 /* check if we're going over the edge */
22074 if (((uintptr_t)footprint_header +
22075 new_region_offset +
22076 sizeof(*footprint_region)) >=
22077 footprint_edge) {
22078 /* over the edge: no new region */
22079 return NULL;
22080 }
22081
22082 /* adjust offset of last region in header */
22083 footprint_header->cf_last_region = new_region_offset;
22084
22085 new_footprint_region = (struct vm_map_corpse_footprint_region *)
22086 ((char *)footprint_header +
22087 footprint_header->cf_last_region);
22088 new_footprint_region->cfr_vaddr = 0;
22089 new_footprint_region->cfr_num_pages = 0;
22090 /* caller needs to initialize new region */
22091
22092 return new_footprint_region;
22093 }
22094
22095 /*
22096 * vm_map_corpse_footprint_collect:
22097 * collect footprint information for "old_entry" in "old_map" and
22098 * stores it in "new_map"'s vmmap_footprint_info.
22099 */
22100 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)22101 vm_map_corpse_footprint_collect(
22102 vm_map_t old_map,
22103 vm_map_entry_t old_entry,
22104 vm_map_t new_map)
22105 {
22106 vm_map_offset_t va;
22107 kern_return_t kr;
22108 struct vm_map_corpse_footprint_header *footprint_header;
22109 struct vm_map_corpse_footprint_region *footprint_region;
22110 struct vm_map_corpse_footprint_region *new_footprint_region;
22111 cf_disp_t *next_disp_p;
22112 uintptr_t footprint_edge;
22113 uint32_t num_pages_tmp;
22114 int effective_page_size;
22115
22116 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
22117
22118 va = old_entry->vme_start;
22119
22120 vm_map_lock_assert_exclusive(old_map);
22121 vm_map_lock_assert_exclusive(new_map);
22122
22123 assert(new_map->has_corpse_footprint);
22124 assert(!old_map->has_corpse_footprint);
22125 if (!new_map->has_corpse_footprint ||
22126 old_map->has_corpse_footprint) {
22127 /*
22128 * This can only transfer footprint info from a
22129 * map with a live pmap to a map with a corpse footprint.
22130 */
22131 return KERN_NOT_SUPPORTED;
22132 }
22133
22134 if (new_map->vmmap_corpse_footprint == NULL) {
22135 vm_offset_t buf;
22136 vm_size_t buf_size;
22137
22138 buf = 0;
22139 buf_size = (sizeof(*footprint_header) +
22140 (old_map->hdr.nentries
22141 *
22142 (sizeof(*footprint_region) +
22143 +3)) /* potential alignment for each region */
22144 +
22145 ((old_map->size / effective_page_size)
22146 *
22147 sizeof(cf_disp_t))); /* disposition for each page */
22148 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
22149 buf_size = round_page(buf_size);
22150
22151 /* limit buffer to 1 page to validate overflow detection */
22152 // buf_size = PAGE_SIZE;
22153
22154 /* limit size to a somewhat sane amount */
22155 #if XNU_TARGET_OS_OSX
22156 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
22157 #else /* XNU_TARGET_OS_OSX */
22158 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
22159 #endif /* XNU_TARGET_OS_OSX */
22160 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
22161 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
22162 }
22163
22164 /*
22165 * Allocate the pageable buffer (with a trailing guard page).
22166 * It will be zero-filled on demand.
22167 */
22168 kr = kmem_alloc(kernel_map, &buf, buf_size + PAGE_SIZE,
22169 KMA_DATA | KMA_PAGEABLE | KMA_GUARD_LAST,
22170 VM_KERN_MEMORY_DIAG);
22171 if (kr != KERN_SUCCESS) {
22172 vm_map_corpse_footprint_no_buf++;
22173 return kr;
22174 }
22175
22176 /* initialize header and 1st region */
22177 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
22178 new_map->vmmap_corpse_footprint = footprint_header;
22179
22180 footprint_header->cf_size = buf_size;
22181 footprint_header->cf_last_region =
22182 sizeof(*footprint_header);
22183 footprint_header->cf_last_zeroes = 0;
22184
22185 footprint_region = (struct vm_map_corpse_footprint_region *)
22186 ((char *)footprint_header +
22187 footprint_header->cf_last_region);
22188 footprint_region->cfr_vaddr = 0;
22189 footprint_region->cfr_num_pages = 0;
22190 } else {
22191 /* retrieve header and last region */
22192 footprint_header = (struct vm_map_corpse_footprint_header *)
22193 new_map->vmmap_corpse_footprint;
22194 footprint_region = (struct vm_map_corpse_footprint_region *)
22195 ((char *)footprint_header +
22196 footprint_header->cf_last_region);
22197 }
22198 footprint_edge = ((uintptr_t)footprint_header +
22199 footprint_header->cf_size);
22200
22201 if ((footprint_region->cfr_vaddr +
22202 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
22203 effective_page_size))
22204 != old_entry->vme_start) {
22205 uint64_t num_pages_delta, num_pages_delta_size;
22206 uint32_t region_offset_delta_size;
22207
22208 /*
22209 * Not the next contiguous virtual address:
22210 * start a new region or store "zero" dispositions for
22211 * the missing pages?
22212 */
22213 /* size of gap in actual page dispositions */
22214 num_pages_delta = ((old_entry->vme_start -
22215 footprint_region->cfr_vaddr) / effective_page_size)
22216 - footprint_region->cfr_num_pages;
22217 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
22218 /* size of gap as a new footprint region header */
22219 region_offset_delta_size =
22220 (sizeof(*footprint_region) +
22221 roundup(((footprint_region->cfr_num_pages -
22222 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
22223 sizeof(int)) -
22224 ((footprint_region->cfr_num_pages -
22225 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
22226 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
22227 if (region_offset_delta_size < num_pages_delta_size ||
22228 os_add3_overflow(footprint_region->cfr_num_pages,
22229 (uint32_t) num_pages_delta,
22230 1,
22231 &num_pages_tmp)) {
22232 /*
22233 * Storing data for this gap would take more space
22234 * than inserting a new footprint region header:
22235 * let's start a new region and save space. If it's a
22236 * tie, let's avoid using a new region, since that
22237 * would require more region hops to find the right
22238 * range during lookups.
22239 *
22240 * If the current region's cfr_num_pages would overflow
22241 * if we added "zero" page dispositions for the gap,
22242 * no choice but to start a new region.
22243 */
22244 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
22245 new_footprint_region =
22246 vm_map_corpse_footprint_new_region(footprint_header);
22247 /* check that we're not going over the edge */
22248 if (new_footprint_region == NULL) {
22249 goto over_the_edge;
22250 }
22251 footprint_region = new_footprint_region;
22252 /* initialize new region as empty */
22253 footprint_region->cfr_vaddr = old_entry->vme_start;
22254 footprint_region->cfr_num_pages = 0;
22255 } else {
22256 /*
22257 * Store "zero" page dispositions for the missing
22258 * pages.
22259 */
22260 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
22261 for (; num_pages_delta > 0; num_pages_delta--) {
22262 next_disp_p = (cf_disp_t *)
22263 ((uintptr_t) footprint_region +
22264 sizeof(*footprint_region));
22265 next_disp_p += footprint_region->cfr_num_pages;
22266 /* check that we're not going over the edge */
22267 if ((uintptr_t)next_disp_p >= footprint_edge) {
22268 goto over_the_edge;
22269 }
22270 /* store "zero" disposition for this gap page */
22271 footprint_region->cfr_num_pages++;
22272 *next_disp_p = (cf_disp_t) 0;
22273 footprint_header->cf_last_zeroes++;
22274 }
22275 }
22276 }
22277
22278 for (va = old_entry->vme_start;
22279 va < old_entry->vme_end;
22280 va += effective_page_size) {
22281 int disposition;
22282 cf_disp_t cf_disp;
22283
22284 vm_map_footprint_query_page_info(old_map,
22285 old_entry,
22286 va,
22287 &disposition);
22288 cf_disp = vm_page_disposition_to_cf_disp(disposition);
22289
22290 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
22291
22292 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
22293 /*
22294 * Ignore "zero" dispositions at start of
22295 * region: just move start of region.
22296 */
22297 footprint_region->cfr_vaddr += effective_page_size;
22298 continue;
22299 }
22300
22301 /* would region's cfr_num_pages overflow? */
22302 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
22303 &num_pages_tmp)) {
22304 /* overflow: create a new region */
22305 new_footprint_region =
22306 vm_map_corpse_footprint_new_region(
22307 footprint_header);
22308 if (new_footprint_region == NULL) {
22309 goto over_the_edge;
22310 }
22311 footprint_region = new_footprint_region;
22312 footprint_region->cfr_vaddr = va;
22313 footprint_region->cfr_num_pages = 0;
22314 }
22315
22316 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
22317 sizeof(*footprint_region));
22318 next_disp_p += footprint_region->cfr_num_pages;
22319 /* check that we're not going over the edge */
22320 if ((uintptr_t)next_disp_p >= footprint_edge) {
22321 goto over_the_edge;
22322 }
22323 /* store this dispostion */
22324 *next_disp_p = cf_disp;
22325 footprint_region->cfr_num_pages++;
22326
22327 if (cf_disp != 0) {
22328 /* non-zero disp: break the current zero streak */
22329 footprint_header->cf_last_zeroes = 0;
22330 /* done */
22331 continue;
22332 }
22333
22334 /* zero disp: add to the current streak of zeroes */
22335 footprint_header->cf_last_zeroes++;
22336 if ((footprint_header->cf_last_zeroes +
22337 roundup(((footprint_region->cfr_num_pages -
22338 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
22339 (sizeof(int) - 1),
22340 sizeof(int))) <
22341 (sizeof(*footprint_header))) {
22342 /*
22343 * There are not enough trailing "zero" dispositions
22344 * (+ the extra padding we would need for the previous
22345 * region); creating a new region would not save space
22346 * at this point, so let's keep this "zero" disposition
22347 * in this region and reconsider later.
22348 */
22349 continue;
22350 }
22351 /*
22352 * Create a new region to avoid having too many consecutive
22353 * "zero" dispositions.
22354 */
22355 new_footprint_region =
22356 vm_map_corpse_footprint_new_region(footprint_header);
22357 if (new_footprint_region == NULL) {
22358 goto over_the_edge;
22359 }
22360 footprint_region = new_footprint_region;
22361 /* initialize the new region as empty ... */
22362 footprint_region->cfr_num_pages = 0;
22363 /* ... and skip this "zero" disp */
22364 footprint_region->cfr_vaddr = va + effective_page_size;
22365 }
22366
22367 return KERN_SUCCESS;
22368
22369 over_the_edge:
22370 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
22371 vm_map_corpse_footprint_full++;
22372 return KERN_RESOURCE_SHORTAGE;
22373 }
22374
22375 /*
22376 * vm_map_corpse_footprint_collect_done:
22377 * completes the footprint collection by getting rid of any remaining
22378 * trailing "zero" dispositions and trimming the unused part of the
22379 * kernel buffer
22380 */
22381 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)22382 vm_map_corpse_footprint_collect_done(
22383 vm_map_t new_map)
22384 {
22385 struct vm_map_corpse_footprint_header *footprint_header;
22386 struct vm_map_corpse_footprint_region *footprint_region;
22387 vm_size_t buf_size, actual_size;
22388 kern_return_t kr;
22389
22390 assert(new_map->has_corpse_footprint);
22391 if (!new_map->has_corpse_footprint ||
22392 new_map->vmmap_corpse_footprint == NULL) {
22393 return;
22394 }
22395
22396 footprint_header = (struct vm_map_corpse_footprint_header *)
22397 new_map->vmmap_corpse_footprint;
22398 buf_size = footprint_header->cf_size;
22399
22400 footprint_region = (struct vm_map_corpse_footprint_region *)
22401 ((char *)footprint_header +
22402 footprint_header->cf_last_region);
22403
22404 /* get rid of trailing zeroes in last region */
22405 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
22406 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
22407 footprint_header->cf_last_zeroes = 0;
22408
22409 actual_size = (vm_size_t)(footprint_header->cf_last_region +
22410 sizeof(*footprint_region) +
22411 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
22412
22413 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
22414 vm_map_corpse_footprint_size_avg =
22415 (((vm_map_corpse_footprint_size_avg *
22416 vm_map_corpse_footprint_count) +
22417 actual_size) /
22418 (vm_map_corpse_footprint_count + 1));
22419 vm_map_corpse_footprint_count++;
22420 if (actual_size > vm_map_corpse_footprint_size_max) {
22421 vm_map_corpse_footprint_size_max = actual_size;
22422 }
22423
22424 actual_size = round_page(actual_size);
22425 if (buf_size > actual_size) {
22426 kr = vm_deallocate(kernel_map,
22427 ((vm_address_t)footprint_header +
22428 actual_size +
22429 PAGE_SIZE), /* trailing guard page */
22430 (buf_size - actual_size));
22431 assertf(kr == KERN_SUCCESS,
22432 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
22433 footprint_header,
22434 (uint64_t) buf_size,
22435 (uint64_t) actual_size,
22436 kr);
22437 kr = vm_protect(kernel_map,
22438 ((vm_address_t)footprint_header +
22439 actual_size),
22440 PAGE_SIZE,
22441 FALSE, /* set_maximum */
22442 VM_PROT_NONE);
22443 assertf(kr == KERN_SUCCESS,
22444 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
22445 footprint_header,
22446 (uint64_t) buf_size,
22447 (uint64_t) actual_size,
22448 kr);
22449 }
22450
22451 footprint_header->cf_size = actual_size;
22452 }
22453
22454 /*
22455 * vm_map_corpse_footprint_query_page_info:
22456 * retrieves the disposition of the page at virtual address "vaddr"
22457 * in the forked corpse's VM map
22458 *
22459 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
22460 */
22461 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)22462 vm_map_corpse_footprint_query_page_info(
22463 vm_map_t map,
22464 vm_map_offset_t va,
22465 int *disposition_p)
22466 {
22467 struct vm_map_corpse_footprint_header *footprint_header;
22468 struct vm_map_corpse_footprint_region *footprint_region;
22469 uint32_t footprint_region_offset;
22470 vm_map_offset_t region_start, region_end;
22471 int disp_idx;
22472 kern_return_t kr;
22473 int effective_page_size;
22474 cf_disp_t cf_disp;
22475
22476 if (!map->has_corpse_footprint) {
22477 *disposition_p = 0;
22478 kr = KERN_INVALID_ARGUMENT;
22479 goto done;
22480 }
22481
22482 footprint_header = map->vmmap_corpse_footprint;
22483 if (footprint_header == NULL) {
22484 *disposition_p = 0;
22485 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22486 kr = KERN_INVALID_ARGUMENT;
22487 goto done;
22488 }
22489
22490 /* start looking at the hint ("cf_hint_region") */
22491 footprint_region_offset = footprint_header->cf_hint_region;
22492
22493 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
22494
22495 lookup_again:
22496 if (footprint_region_offset < sizeof(*footprint_header)) {
22497 /* hint too low: start from 1st region */
22498 footprint_region_offset = sizeof(*footprint_header);
22499 }
22500 if (footprint_region_offset >= footprint_header->cf_last_region) {
22501 /* hint too high: re-start from 1st region */
22502 footprint_region_offset = sizeof(*footprint_header);
22503 }
22504 footprint_region = (struct vm_map_corpse_footprint_region *)
22505 ((char *)footprint_header + footprint_region_offset);
22506 region_start = footprint_region->cfr_vaddr;
22507 region_end = (region_start +
22508 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22509 effective_page_size));
22510 if (va < region_start &&
22511 footprint_region_offset != sizeof(*footprint_header)) {
22512 /* our range starts before the hint region */
22513
22514 /* reset the hint (in a racy way...) */
22515 footprint_header->cf_hint_region = sizeof(*footprint_header);
22516 /* lookup "va" again from 1st region */
22517 footprint_region_offset = sizeof(*footprint_header);
22518 goto lookup_again;
22519 }
22520
22521 while (va >= region_end) {
22522 if (footprint_region_offset >= footprint_header->cf_last_region) {
22523 break;
22524 }
22525 /* skip the region's header */
22526 footprint_region_offset += sizeof(*footprint_region);
22527 /* skip the region's page dispositions */
22528 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
22529 /* align to next word boundary */
22530 footprint_region_offset =
22531 roundup(footprint_region_offset,
22532 sizeof(int));
22533 footprint_region = (struct vm_map_corpse_footprint_region *)
22534 ((char *)footprint_header + footprint_region_offset);
22535 region_start = footprint_region->cfr_vaddr;
22536 region_end = (region_start +
22537 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22538 effective_page_size));
22539 }
22540 if (va < region_start || va >= region_end) {
22541 /* page not found */
22542 *disposition_p = 0;
22543 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22544 kr = KERN_SUCCESS;
22545 goto done;
22546 }
22547
22548 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
22549 footprint_header->cf_hint_region = footprint_region_offset;
22550
22551 /* get page disposition for "va" in this region */
22552 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
22553 cf_disp = footprint_region->cfr_disposition[disp_idx];
22554 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
22555 kr = KERN_SUCCESS;
22556 done:
22557 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22558 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
22559 DTRACE_VM4(footprint_query_page_info,
22560 vm_map_t, map,
22561 vm_map_offset_t, va,
22562 int, *disposition_p,
22563 kern_return_t, kr);
22564
22565 return kr;
22566 }
22567
22568 void
vm_map_corpse_footprint_destroy(vm_map_t map)22569 vm_map_corpse_footprint_destroy(
22570 vm_map_t map)
22571 {
22572 if (map->has_corpse_footprint &&
22573 map->vmmap_corpse_footprint != 0) {
22574 struct vm_map_corpse_footprint_header *footprint_header;
22575 vm_size_t buf_size;
22576 kern_return_t kr;
22577
22578 footprint_header = map->vmmap_corpse_footprint;
22579 buf_size = footprint_header->cf_size;
22580 kr = vm_deallocate(kernel_map,
22581 (vm_offset_t) map->vmmap_corpse_footprint,
22582 ((vm_size_t) buf_size
22583 + PAGE_SIZE)); /* trailing guard page */
22584 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
22585 map->vmmap_corpse_footprint = 0;
22586 map->has_corpse_footprint = FALSE;
22587 }
22588 }
22589
22590 /*
22591 * vm_map_copy_footprint_ledgers:
22592 * copies any ledger that's relevant to the memory footprint of "old_task"
22593 * into the forked corpse's task ("new_task")
22594 */
22595 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)22596 vm_map_copy_footprint_ledgers(
22597 task_t old_task,
22598 task_t new_task)
22599 {
22600 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
22601 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
22602 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
22603 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
22604 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
22605 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
22606 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
22607 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
22608 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
22609 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
22610 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
22611 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
22612 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
22613 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
22614 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
22615 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
22616 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
22617 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
22618 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
22619 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
22620 }
22621
22622 /*
22623 * vm_map_copy_ledger:
22624 * copy a single ledger from "old_task" to "new_task"
22625 */
22626 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)22627 vm_map_copy_ledger(
22628 task_t old_task,
22629 task_t new_task,
22630 int ledger_entry)
22631 {
22632 ledger_amount_t old_balance, new_balance, delta;
22633
22634 assert(new_task->map->has_corpse_footprint);
22635 if (!new_task->map->has_corpse_footprint) {
22636 return;
22637 }
22638
22639 /* turn off sanity checks for the ledger we're about to mess with */
22640 ledger_disable_panic_on_negative(new_task->ledger,
22641 ledger_entry);
22642
22643 /* adjust "new_task" to match "old_task" */
22644 ledger_get_balance(old_task->ledger,
22645 ledger_entry,
22646 &old_balance);
22647 ledger_get_balance(new_task->ledger,
22648 ledger_entry,
22649 &new_balance);
22650 if (new_balance == old_balance) {
22651 /* new == old: done */
22652 } else if (new_balance > old_balance) {
22653 /* new > old ==> new -= new - old */
22654 delta = new_balance - old_balance;
22655 ledger_debit(new_task->ledger,
22656 ledger_entry,
22657 delta);
22658 } else {
22659 /* new < old ==> new += old - new */
22660 delta = old_balance - new_balance;
22661 ledger_credit(new_task->ledger,
22662 ledger_entry,
22663 delta);
22664 }
22665 }
22666
22667 /*
22668 * vm_map_get_pmap:
22669 * returns the pmap associated with the vm_map
22670 */
22671 pmap_t
vm_map_get_pmap(vm_map_t map)22672 vm_map_get_pmap(vm_map_t map)
22673 {
22674 return vm_map_pmap(map);
22675 }
22676
22677 #if CONFIG_MAP_RANGES
22678 static bitmap_t vm_map_user_range_heap_map[BITMAP_LEN(VM_MEMORY_COUNT)];
22679
22680 /*
22681 * vm_map_range_map_init:
22682 * initializes the VM range ID map to enable index lookup
22683 * of user VM ranges based on VM tag from userspace.
22684 */
22685 static void
vm_map_range_map_init(void)22686 vm_map_range_map_init(void)
22687 {
22688 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC);
22689 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_HUGE);
22690 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_LARGE);
22691 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_LARGE_REUSED);
22692 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_MEDIUM);
22693 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_PROB_GUARD);
22694 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_SMALL);
22695 bitmap_set(vm_map_user_range_heap_map, VM_MEMORY_MALLOC_TINY);
22696 }
22697
22698 /*
22699 * vm_map_range_configure:
22700 * configures the user vm_map ranges by increasing the maximum VA range of
22701 * the map and carving out a range at the end of VA space (searching backwards
22702 * in the newly expanded map).
22703 */
22704 kern_return_t
vm_map_range_configure(vm_map_t map)22705 vm_map_range_configure(vm_map_t map)
22706 {
22707 vm_map_size_t addr_space_size;
22708 vm_map_offset_t start, end, saved_max, random_addr;
22709 kern_return_t kr;
22710
22711 /* Should not be applying ranges to kernel map or kernel map submaps */
22712 assert(map != kernel_map);
22713 assert(vm_map_pmap(map) != kernel_pmap);
22714
22715 /* save the existing max offset */
22716 vm_map_lock_read(map);
22717 saved_max = vm_map_max(map);
22718 vm_map_unlock_read(map);
22719
22720 /*
22721 * Check that we're not already jumbo'd. If so we cannot guarantee that
22722 * we can set up the ranges safely without interfering with the existing
22723 * map.
22724 */
22725 if (saved_max > vm_compute_max_offset(vm_map_is_64bit(map))) {
22726 return KERN_NO_SPACE;
22727 }
22728
22729 /* expand the default VM space to the largest possible address */
22730 vm_map_set_jumbo(map);
22731
22732 vm_map_lock(map);
22733 addr_space_size = vm_map_max(map) - saved_max;
22734
22735 if (addr_space_size <= VM_MAP_USER_RANGE_MAX) {
22736 vm_map_unlock(map);
22737 return KERN_NO_SPACE;
22738 }
22739
22740 addr_space_size -= VM_MAP_USER_RANGE_MAX;
22741 random_addr = (vm_map_offset_t)random();
22742 random_addr <<= VM_MAP_PAGE_SHIFT(map);
22743 random_addr %= addr_space_size;
22744
22745 /*
22746 * round off the start so we begin on a L2 TT boundary and ensure we have
22747 * at least a ARM_TT_L2_SIZE sized hole between existing map range and
22748 * new range(s).
22749 */
22750 start = vm_map_round_page(saved_max + random_addr + 1, ARM_TT_L2_OFFMASK);
22751 end = MIN(vm_map_max(map), start + VM_MAP_USER_RANGE_MAX);
22752 assert(start > saved_max);
22753 assert(end <= vm_map_max(map));
22754
22755 /* default range covers the "normal" heap range */
22756 map->user_range[UMEM_RANGE_ID_DEFAULT].min_address = vm_map_min(map);
22757 map->user_range[UMEM_RANGE_ID_DEFAULT].max_address = saved_max;
22758
22759 /* heap range covers the new extended range */
22760 map->user_range[UMEM_RANGE_ID_HEAP].min_address = start;
22761 map->user_range[UMEM_RANGE_ID_HEAP].max_address = end;
22762
22763 vm_map_unlock(map);
22764
22765 /*
22766 * Poke holes so that ASAN or people listing regions
22767 * do not think this space is free.
22768 */
22769
22770 if (start != saved_max) {
22771 kr = vm_map_enter(map, &saved_max, start - saved_max,
22772 0, VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(), VM_OBJECT_NULL,
22773 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT);
22774 assert(kr == KERN_SUCCESS);
22775 }
22776
22777 if (end != vm_map_max(map)) {
22778 kr = vm_map_enter(map, &end, vm_map_max(map) - end,
22779 0, VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(), VM_OBJECT_NULL,
22780 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT);
22781 assert(kr == KERN_SUCCESS);
22782 }
22783
22784 vm_map_lock(map);
22785
22786 map->uses_user_ranges = true;
22787
22788 vm_map_unlock(map);
22789
22790 return KERN_SUCCESS;
22791 }
22792
22793 /*
22794 * vm_map_range_fork:
22795 * clones the array of ranges from old_map to new_map in support
22796 * of a VM map fork.
22797 */
22798 void
vm_map_range_fork(vm_map_t new_map,vm_map_t old_map)22799 vm_map_range_fork(vm_map_t new_map, vm_map_t old_map)
22800 {
22801 if (!old_map->uses_user_ranges) {
22802 /* nothing to do */
22803 return;
22804 }
22805
22806 for (size_t i = 0; i < UMEM_RANGE_COUNT; i++) {
22807 new_map->user_range[i] = old_map->user_range[i];
22808 }
22809
22810 new_map->uses_user_ranges = true;
22811 }
22812
22813 /*
22814 * vm_map_get_user_range:
22815 * copy the VM user range for the given VM map and range ID.
22816 */
22817 kern_return_t
vm_map_get_user_range(vm_map_t map,vm_map_range_id_t range_id,mach_vm_range_t range)22818 vm_map_get_user_range(
22819 vm_map_t map,
22820 vm_map_range_id_t range_id,
22821 mach_vm_range_t range)
22822 {
22823 if (map == NULL ||
22824 !map->uses_user_ranges ||
22825 range_id > UMEM_RANGE_ID_MAX ||
22826 range == NULL) {
22827 return KERN_INVALID_ARGUMENT;
22828 }
22829
22830 *range = map->user_range[range_id];
22831 return KERN_SUCCESS;
22832 }
22833
22834 static vm_map_range_id_t
vm_map_user_range_resolve(vm_map_t map,mach_vm_address_t addr,mach_vm_size_t size,mach_vm_range_t range)22835 vm_map_user_range_resolve(
22836 vm_map_t map,
22837 mach_vm_address_t addr,
22838 mach_vm_size_t size,
22839 mach_vm_range_t range)
22840 {
22841 vm_map_lock_assert_held(map);
22842
22843 for (vm_map_range_id_t i = 0; i < UMEM_RANGE_COUNT; i++) {
22844 mach_vm_range_t r = &map->user_range[i];
22845
22846 if (mach_vm_range_contains(r, addr, size)) {
22847 if (range) {
22848 *range = *r;
22849 }
22850 return i;
22851 }
22852 }
22853
22854 if (range) {
22855 range->min_address = range->max_address = 0;
22856 }
22857 return UMEM_RANGE_ID_DEFAULT;
22858 }
22859
22860 #endif /* CONFIG_MAP_RANGES */
22861
22862 void
vm_map_kernel_flags_update_range_id(vm_map_kernel_flags_t * vmkf,vm_map_t map)22863 vm_map_kernel_flags_update_range_id(vm_map_kernel_flags_t *vmkf, vm_map_t map)
22864 {
22865 if (map == kernel_map) {
22866 if (vmkf->vmkf_range_id == KMEM_RANGE_ID_NONE) {
22867 vmkf->vmkf_range_id = KMEM_RANGE_ID_DATA;
22868 }
22869 #if CONFIG_MAP_RANGES
22870 } else if (vmkf->vm_tag < VM_MEMORY_COUNT &&
22871 vmkf->vmkf_range_id == UMEM_RANGE_ID_DEFAULT &&
22872 bitmap_test(vm_map_user_range_heap_map, vmkf->vm_tag)) {
22873 vmkf->vmkf_range_id = UMEM_RANGE_ID_HEAP;
22874 #endif /* CONFIG_MAP_RANGES */
22875 }
22876 }
22877
22878 /*
22879 * vm_map_entry_has_device_pager:
22880 * Check if the vm map entry specified by the virtual address has a device pager.
22881 * If the vm map entry does not exist or if the map is NULL, this returns FALSE.
22882 */
22883 boolean_t
vm_map_entry_has_device_pager(vm_map_t map,vm_map_offset_t vaddr)22884 vm_map_entry_has_device_pager(vm_map_t map, vm_map_offset_t vaddr)
22885 {
22886 vm_map_entry_t entry;
22887 vm_object_t object;
22888 boolean_t result;
22889
22890 if (map == NULL) {
22891 return FALSE;
22892 }
22893
22894 vm_map_lock(map);
22895 while (TRUE) {
22896 if (!vm_map_lookup_entry(map, vaddr, &entry)) {
22897 result = FALSE;
22898 break;
22899 }
22900 if (entry->is_sub_map) {
22901 // Check the submap
22902 vm_map_t submap = VME_SUBMAP(entry);
22903 assert(submap != NULL);
22904 vm_map_lock(submap);
22905 vm_map_unlock(map);
22906 map = submap;
22907 continue;
22908 }
22909 object = VME_OBJECT(entry);
22910 if (object != NULL && object->pager != NULL && is_device_pager_ops(object->pager->mo_pager_ops)) {
22911 result = TRUE;
22912 break;
22913 }
22914 result = FALSE;
22915 break;
22916 }
22917
22918 vm_map_unlock(map);
22919 return result;
22920 }
22921
22922
22923 #if MACH_ASSERT
22924
22925 extern int pmap_ledgers_panic;
22926 extern int pmap_ledgers_panic_leeway;
22927
22928 #define LEDGER_DRIFT(__LEDGER) \
22929 int __LEDGER##_over; \
22930 ledger_amount_t __LEDGER##_over_total; \
22931 ledger_amount_t __LEDGER##_over_max; \
22932 int __LEDGER##_under; \
22933 ledger_amount_t __LEDGER##_under_total; \
22934 ledger_amount_t __LEDGER##_under_max
22935
22936 struct {
22937 uint64_t num_pmaps_checked;
22938
22939 LEDGER_DRIFT(phys_footprint);
22940 LEDGER_DRIFT(internal);
22941 LEDGER_DRIFT(internal_compressed);
22942 LEDGER_DRIFT(external);
22943 LEDGER_DRIFT(reusable);
22944 LEDGER_DRIFT(iokit_mapped);
22945 LEDGER_DRIFT(alternate_accounting);
22946 LEDGER_DRIFT(alternate_accounting_compressed);
22947 LEDGER_DRIFT(page_table);
22948 LEDGER_DRIFT(purgeable_volatile);
22949 LEDGER_DRIFT(purgeable_nonvolatile);
22950 LEDGER_DRIFT(purgeable_volatile_compressed);
22951 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22952 LEDGER_DRIFT(tagged_nofootprint);
22953 LEDGER_DRIFT(tagged_footprint);
22954 LEDGER_DRIFT(tagged_nofootprint_compressed);
22955 LEDGER_DRIFT(tagged_footprint_compressed);
22956 LEDGER_DRIFT(network_volatile);
22957 LEDGER_DRIFT(network_nonvolatile);
22958 LEDGER_DRIFT(network_volatile_compressed);
22959 LEDGER_DRIFT(network_nonvolatile_compressed);
22960 LEDGER_DRIFT(media_nofootprint);
22961 LEDGER_DRIFT(media_footprint);
22962 LEDGER_DRIFT(media_nofootprint_compressed);
22963 LEDGER_DRIFT(media_footprint_compressed);
22964 LEDGER_DRIFT(graphics_nofootprint);
22965 LEDGER_DRIFT(graphics_footprint);
22966 LEDGER_DRIFT(graphics_nofootprint_compressed);
22967 LEDGER_DRIFT(graphics_footprint_compressed);
22968 LEDGER_DRIFT(neural_nofootprint);
22969 LEDGER_DRIFT(neural_footprint);
22970 LEDGER_DRIFT(neural_nofootprint_compressed);
22971 LEDGER_DRIFT(neural_footprint_compressed);
22972 } pmap_ledgers_drift;
22973
22974 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22975 vm_map_pmap_check_ledgers(
22976 pmap_t pmap,
22977 ledger_t ledger,
22978 int pid,
22979 char *procname)
22980 {
22981 ledger_amount_t bal;
22982 boolean_t do_panic;
22983
22984 do_panic = FALSE;
22985
22986 pmap_ledgers_drift.num_pmaps_checked++;
22987
22988 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22989 MACRO_BEGIN \
22990 int panic_on_negative = TRUE; \
22991 ledger_get_balance(ledger, \
22992 task_ledgers.__LEDGER, \
22993 &bal); \
22994 ledger_get_panic_on_negative(ledger, \
22995 task_ledgers.__LEDGER, \
22996 &panic_on_negative); \
22997 if (bal != 0) { \
22998 if (panic_on_negative || \
22999 (pmap_ledgers_panic && \
23000 pmap_ledgers_panic_leeway > 0 && \
23001 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
23002 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
23003 do_panic = TRUE; \
23004 } \
23005 printf("LEDGER BALANCE proc %d (%s) " \
23006 "\"%s\" = %lld\n", \
23007 pid, procname, #__LEDGER, bal); \
23008 if (bal > 0) { \
23009 pmap_ledgers_drift.__LEDGER##_over++; \
23010 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
23011 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
23012 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
23013 } \
23014 } else if (bal < 0) { \
23015 pmap_ledgers_drift.__LEDGER##_under++; \
23016 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
23017 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
23018 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
23019 } \
23020 } \
23021 } \
23022 MACRO_END
23023
23024 LEDGER_CHECK_BALANCE(phys_footprint);
23025 LEDGER_CHECK_BALANCE(internal);
23026 LEDGER_CHECK_BALANCE(internal_compressed);
23027 LEDGER_CHECK_BALANCE(external);
23028 LEDGER_CHECK_BALANCE(reusable);
23029 LEDGER_CHECK_BALANCE(iokit_mapped);
23030 LEDGER_CHECK_BALANCE(alternate_accounting);
23031 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
23032 LEDGER_CHECK_BALANCE(page_table);
23033 LEDGER_CHECK_BALANCE(purgeable_volatile);
23034 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
23035 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
23036 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
23037 LEDGER_CHECK_BALANCE(tagged_nofootprint);
23038 LEDGER_CHECK_BALANCE(tagged_footprint);
23039 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
23040 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
23041 LEDGER_CHECK_BALANCE(network_volatile);
23042 LEDGER_CHECK_BALANCE(network_nonvolatile);
23043 LEDGER_CHECK_BALANCE(network_volatile_compressed);
23044 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
23045 LEDGER_CHECK_BALANCE(media_nofootprint);
23046 LEDGER_CHECK_BALANCE(media_footprint);
23047 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
23048 LEDGER_CHECK_BALANCE(media_footprint_compressed);
23049 LEDGER_CHECK_BALANCE(graphics_nofootprint);
23050 LEDGER_CHECK_BALANCE(graphics_footprint);
23051 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
23052 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
23053 LEDGER_CHECK_BALANCE(neural_nofootprint);
23054 LEDGER_CHECK_BALANCE(neural_footprint);
23055 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
23056 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
23057
23058 if (do_panic) {
23059 if (pmap_ledgers_panic) {
23060 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
23061 pmap, pid, procname);
23062 } else {
23063 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
23064 pmap, pid, procname);
23065 }
23066 }
23067 }
23068
23069 void
vm_map_pmap_set_process(vm_map_t map,int pid,char * procname)23070 vm_map_pmap_set_process(
23071 vm_map_t map,
23072 int pid,
23073 char *procname)
23074 {
23075 pmap_set_process(vm_map_pmap(map), pid, procname);
23076 }
23077
23078 #endif /* MACH_ASSERT */
23079